All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.component.QueryElevationComponent Maven / Gradle / Ivy

There is a newer version: 9.6.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.component;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.Set;
import java.util.SortedSet;
import java.util.WeakHashMap;
import java.util.function.Consumer;

import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.ObjectArrays;
import com.google.common.collect.Sets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.SimpleFieldComparator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.QueryElevationParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.XmlConfigFile;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.transform.ElevatedMarkerFactory;
import org.apache.solr.response.transform.ExcludedMarkerFactory;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.VersionedFile;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * A component to elevate some documents to the top of the result set.
 *
 * @since solr 1.3
 */
@SuppressWarnings("WeakerAccess")
public class QueryElevationComponent extends SearchComponent implements SolrCoreAware {

  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  // Constants used in solrconfig.xml
  @VisibleForTesting
  static final String FIELD_TYPE = "queryFieldType";
  @VisibleForTesting
  static final String CONFIG_FILE = "config-file";
  private static final String EXCLUDE = "exclude";

  /** @see #getBoostDocs(SolrIndexSearcher, Set, Map) */
  private static final String BOOSTED_DOCIDS = "BOOSTED_DOCIDS";

  /** Key to {@link SolrQueryRequest#getContext()} for a {@code Set} of included IDs in configured
   * order (so-called priority). */
  public static final String BOOSTED = "BOOSTED";
  /** Key to {@link SolrQueryRequest#getContext()} for a {@code Set} of excluded IDs. */
  public static final String EXCLUDED = "EXCLUDED";

  private static final boolean DEFAULT_FORCE_ELEVATION = false;
  private static final boolean DEFAULT_USE_CONFIGURED_ELEVATED_ORDER = true;
  private static final boolean DEFAULT_SUBSET_MATCH = false;
  private static final String DEFAULT_EXCLUDE_MARKER_FIELD_NAME = "excluded";
  private static final String DEFAULT_EDITORIAL_MARKER_FIELD_NAME = "elevated";

  protected SolrParams initArgs;
  protected Analyzer queryAnalyzer;
  protected SchemaField uniqueKeyField;
  /** @see QueryElevationParams#FORCE_ELEVATION */
  protected boolean forceElevation;
  /** @see QueryElevationParams#USE_CONFIGURED_ELEVATED_ORDER */
  protected boolean useConfiguredElevatedOrder;

  protected boolean initialized;

  /**
   * For each IndexReader, keep an ElevationProvider when the configuration is loaded from the data directory.
   * The key is null if loaded from the config directory, and is never re-loaded.
   */
  private final Map elevationProviderCache = new WeakHashMap<>();

  @Override
  public void init(NamedList args) {
    this.initArgs = args.toSolrParams();
  }

  @Override
  public void inform(SolrCore core) {
    initialized = false;
    try {
      parseFieldType(core);
      setUniqueKeyField(core);
      parseExcludedMarkerFieldName(core);
      parseEditorialMarkerFieldName(core);
      parseForceElevation();
      parseUseConfiguredOrderForElevations();
      loadElevationConfiguration(core);
      initialized = true;
    } catch (InitializationException e) {
      assert !initialized;
      handleInitializationException(e, e.exceptionCause);
    } catch (Exception e) {
      assert !initialized;
      handleInitializationException(e, InitializationExceptionCause.OTHER);
    }
  }

  private void parseFieldType(SolrCore core) throws InitializationException {
    String a = initArgs.get(FIELD_TYPE);
    if (a != null) {
      FieldType ft = core.getLatestSchema().getFieldTypes().get(a);
      if (ft == null) {
        throw new InitializationException("Parameter " + FIELD_TYPE + " defines an unknown field type \"" + a + "\"", InitializationExceptionCause.UNKNOWN_FIELD_TYPE);
      }
      queryAnalyzer = ft.getQueryAnalyzer();
    }
  }

  private void setUniqueKeyField(SolrCore core) throws InitializationException {
    uniqueKeyField = core.getLatestSchema().getUniqueKeyField();
    if (uniqueKeyField == null) {
      throw new InitializationException("This component requires the schema to have a uniqueKeyField", InitializationExceptionCause.MISSING_UNIQUE_KEY_FIELD);
    }
  }

  private void parseExcludedMarkerFieldName(SolrCore core) {
    String markerName = initArgs.get(QueryElevationParams.EXCLUDE_MARKER_FIELD_NAME, DEFAULT_EXCLUDE_MARKER_FIELD_NAME);
    core.addTransformerFactory(markerName, new ExcludedMarkerFactory());
  }

  private void parseEditorialMarkerFieldName(SolrCore core) {
    String markerName = initArgs.get(QueryElevationParams.EDITORIAL_MARKER_FIELD_NAME, DEFAULT_EDITORIAL_MARKER_FIELD_NAME);
    core.addTransformerFactory(markerName, new ElevatedMarkerFactory());
  }

  private void parseForceElevation() {
    forceElevation = initArgs.getBool(QueryElevationParams.FORCE_ELEVATION, DEFAULT_FORCE_ELEVATION);
  }

  private void parseUseConfiguredOrderForElevations() {
    useConfiguredElevatedOrder = initArgs.getBool(QueryElevationParams.USE_CONFIGURED_ELEVATED_ORDER, DEFAULT_USE_CONFIGURED_ELEVATED_ORDER);
  }

  /**
   * (Re)Loads elevation configuration.
   *
   * @param core The core holding this component.
   * @return The number of elevation rules parsed.
   */
  protected int loadElevationConfiguration(SolrCore core) throws Exception {
    synchronized (elevationProviderCache) {
      elevationProviderCache.clear();
      String configFileName = initArgs.get(CONFIG_FILE);
      if (configFileName == null) {
        // Throw an exception which is handled by handleInitializationException().
        // If not overridden handleInitializationException() simply skips this exception.
        throw new InitializationException("Missing component parameter " + CONFIG_FILE + " - it has to define the path to the elevation configuration file", InitializationExceptionCause.NO_CONFIG_FILE_DEFINED);
      }
      boolean configFileExists = false;
      ElevationProvider elevationProvider = NO_OP_ELEVATION_PROVIDER;

      // check if using ZooKeeper
      ZkController zkController = core.getCoreContainer().getZkController();
      if (zkController != null) {
        // TODO : shouldn't have to keep reading the config name when it has been read before
        configFileExists = zkController.configFileExists(zkController.getZkStateReader().readConfigName(core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), configFileName);
      } else {
        File fC = new File(core.getResourceLoader().getConfigDir(), configFileName);
        File fD = new File(core.getDataDir(), configFileName);
        if (fC.exists() == fD.exists()) {
          InitializationException e = new InitializationException("Missing config file \"" + configFileName + "\" - either " + fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both", InitializationExceptionCause.MISSING_CONFIG_FILE);
          elevationProvider = handleConfigLoadingException(e, true);
          elevationProviderCache.put(null, elevationProvider);
        } else if (fC.exists()) {
          if (fC.length() == 0) {
            InitializationException e = new InitializationException("Empty config file \"" + configFileName + "\" - " + fC.getAbsolutePath(), InitializationExceptionCause.EMPTY_CONFIG_FILE);
            elevationProvider = handleConfigLoadingException(e, true);
          } else {
            configFileExists = true;
            log.info("Loading QueryElevation from: " + fC.getAbsolutePath());
            XmlConfigFile cfg = new XmlConfigFile(core.getResourceLoader(), configFileName);
            elevationProvider = loadElevationProvider(cfg);
          }
          elevationProviderCache.put(null, elevationProvider);
        }
      }
      //in other words, we think this is in the data dir, not the conf dir
      if (!configFileExists) {
        // preload the first data
        RefCounted searchHolder = null;
        try {
          searchHolder = core.getNewestSearcher(false);
          if (searchHolder == null) {
            elevationProvider = NO_OP_ELEVATION_PROVIDER;
          } else {
            IndexReader reader = searchHolder.get().getIndexReader();
            elevationProvider = getElevationProvider(reader, core);
          }
        } finally {
          if (searchHolder != null) searchHolder.decref();
        }
      }
      return elevationProvider.size();
    }
  }

  /**
   * Handles the exception that occurred while initializing this component.
   * If this method does not throw an exception, this component silently fails to initialize
   * and is muted with field {@link #initialized} which becomes {@code false}.
   */
  protected void handleInitializationException(Exception exception, InitializationExceptionCause cause) {
    if (cause != InitializationExceptionCause.NO_CONFIG_FILE_DEFINED) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
          "Error initializing " + QueryElevationComponent.class.getSimpleName(), exception);
    }
  }

  /**
   * Handles an exception that occurred while loading the configuration resource.
   *
   * @param e                   The exception caught.
   * @param resourceAccessIssue true if the exception has been thrown
   *                            because the resource could not be accessed (missing or cannot be read)
   *                            or the config file is empty; false if the resource has
   *                            been found and accessed but the error occurred while loading the resource
   *                            (invalid format, incomplete or corrupted).
   * @return The {@link ElevationProvider} to use if the exception is absorbed. If {@code null}
   *         is returned, the {@link #NO_OP_ELEVATION_PROVIDER} is used but not cached in
   *         the {@link ElevationProvider} cache.
   * @throws E If the exception is not absorbed.
   */
  protected  ElevationProvider handleConfigLoadingException(E e, boolean resourceAccessIssue) throws E {
    throw e;
  }

  /**
   * Gets the {@link ElevationProvider} from the data dir or from the cache.
   *
   * @return The cached or loaded {@link ElevationProvider}.
   * @throws java.io.IOException                  If the configuration resource cannot be found, or if an I/O error occurs while analyzing the triggering queries.
   * @throws org.xml.sax.SAXException                 If the configuration resource is not a valid XML content.
   * @throws javax.xml.parsers.ParserConfigurationException If the configuration resource is not a valid XML configuration.
   * @throws RuntimeException             If the configuration resource is not an XML content of the expected format
   *                                      (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}).
   */
  @VisibleForTesting
  ElevationProvider getElevationProvider(IndexReader reader, SolrCore core) throws Exception {
    synchronized (elevationProviderCache) {
      ElevationProvider elevationProvider;
      elevationProvider = elevationProviderCache.get(null);
      if (elevationProvider != null) return elevationProvider;

      elevationProvider = elevationProviderCache.get(reader);
      if (elevationProvider == null) {
        Exception loadingException = null;
        boolean resourceAccessIssue = false;
        try {
          elevationProvider = loadElevationProvider(core);
        } catch (IOException e) {
          loadingException = e;
          resourceAccessIssue = true;
        } catch (Exception e) {
          loadingException = e;
        }
        boolean shouldCache = true;
        if (loadingException != null) {
          elevationProvider = handleConfigLoadingException(loadingException, resourceAccessIssue);
          if (elevationProvider == null) {
            elevationProvider = NO_OP_ELEVATION_PROVIDER;
            shouldCache = false;
          }
        }
        if (shouldCache) {
          elevationProviderCache.put(reader, elevationProvider);
        }
      }
      assert elevationProvider != null;
      return elevationProvider;
    }
  }

  /**
   * Loads the {@link ElevationProvider} from the data dir.
   *
   * @return The loaded {@link ElevationProvider}.
   * @throws java.io.IOException                  If the configuration resource cannot be found, or if an I/O error occurs while analyzing the triggering queries.
   * @throws org.xml.sax.SAXException                 If the configuration resource is not a valid XML content.
   * @throws javax.xml.parsers.ParserConfigurationException If the configuration resource is not a valid XML configuration.
   * @throws RuntimeException             If the configuration resource is not an XML content of the expected format
   *                                      (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}).
   */
  private ElevationProvider loadElevationProvider(SolrCore core) throws IOException, SAXException, ParserConfigurationException {
    String configFileName = initArgs.get(CONFIG_FILE);
    if (configFileName == null) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
          "QueryElevationComponent must specify argument: " + CONFIG_FILE);
    }
    log.info("Loading QueryElevation from data dir: " + configFileName);

    XmlConfigFile cfg;
    ZkController zkController = core.getCoreContainer().getZkController();
    if (zkController != null) {
      cfg = new XmlConfigFile(core.getResourceLoader(), configFileName, null, null);
    } else {
      InputStream is = VersionedFile.getLatestFile(core.getDataDir(), configFileName);
      cfg = new XmlConfigFile(core.getResourceLoader(), configFileName, new InputSource(is), null);
    }
    ElevationProvider elevationProvider = loadElevationProvider(cfg);
    assert elevationProvider != null;
    return elevationProvider;
  }

  /**
   * Loads the {@link ElevationProvider}.
   *
   * @throws RuntimeException If the config does not provide an XML content of the expected format
   *                          (either {@link RuntimeException} or {@link org.apache.solr.common.SolrException}).
   */
  protected ElevationProvider loadElevationProvider(XmlConfigFile config) {
    Map elevationBuilderMap = new LinkedHashMap<>();
    XPath xpath = XPathFactory.newInstance().newXPath();
    NodeList nodes = (NodeList) config.evaluate("elevate/query", XPathConstants.NODESET);
    for (int i = 0; i < nodes.getLength(); i++) {
      Node node = nodes.item(i);
      String queryString = DOMUtil.getAttr(node, "text", "missing query 'text'");
      String matchString = DOMUtil.getAttr(node, "match");
      ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, isSubsetMatchPolicy(matchString));

      NodeList children;
      try {
        children = (NodeList) xpath.evaluate("doc", node, XPathConstants.NODESET);
      } catch (XPathExpressionException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
            "query requires '' child");
      }

      if (children.getLength() == 0) { // weird
        continue;
      }
      ElevationBuilder elevationBuilder = new ElevationBuilder();
      for (int j = 0; j < children.getLength(); j++) {
        Node child = children.item(j);
        String id = DOMUtil.getAttr(child, "id", "missing 'id'");
        String e = DOMUtil.getAttr(child, EXCLUDE, null);
        if (e != null) {
          if (Boolean.valueOf(e)) {
            elevationBuilder.addExcludedIds(Collections.singleton(id));
            continue;
          }
        }
        elevationBuilder.addElevatedIds(Collections.singletonList(id));
      }

      // It is allowed to define multiple times different elevations for the same query. In this case the elevations
      // are merged in the ElevationBuilder (they will be triggered at the same time).
      ElevationBuilder previousElevationBuilder = elevationBuilderMap.get(elevatingQuery);
      if (previousElevationBuilder == null) {
        elevationBuilderMap.put(elevatingQuery, elevationBuilder);
      } else {
        previousElevationBuilder.merge(elevationBuilder);
      }
    }
    return createElevationProvider(elevationBuilderMap);
  }

  protected boolean isSubsetMatchPolicy(String matchString) {
    if (matchString == null) {
      return DEFAULT_SUBSET_MATCH;
    } else if (matchString.equalsIgnoreCase("exact")) {
      return false;
    } else if (matchString.equalsIgnoreCase("subset")) {
      return true;
    } else {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
          "invalid value \"" + matchString + "\" for query match attribute");
    }
  }

  //---------------------------------------------------------------------------------
  // SearchComponent
  //---------------------------------------------------------------------------------

  @Override
  public void prepare(ResponseBuilder rb) throws IOException {
    if (!initialized || !rb.req.getParams().getBool(QueryElevationParams.ENABLE, true)) {
      return;
    }

    Elevation elevation = getElevation(rb);
    if (elevation != null) {
      setQuery(rb, elevation);
      setSort(rb, elevation);
    }

    if (rb.isDebug() && rb.isDebugQuery()) {
      addDebugInfo(rb, elevation);
    }
  }

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    // Do nothing -- the real work is modifying the input query
  }

  protected Elevation getElevation(ResponseBuilder rb) {
    SolrParams localParams = rb.getQparser().getLocalParams();
    String queryString = localParams == null ? rb.getQueryString() : localParams.get(QueryParsing.V);
    if (queryString == null || rb.getQuery() == null) {
      return null;
    }

    SolrParams params = rb.req.getParams();
    String paramElevatedIds = params.get(QueryElevationParams.IDS);
    String paramExcludedIds = params.get(QueryElevationParams.EXCLUDE);
    try {
      if (paramElevatedIds != null || paramExcludedIds != null) {
        List elevatedIds = paramElevatedIds != null ? StrUtils.splitSmart(paramElevatedIds,",", true) : Collections.emptyList();
        List excludedIds = paramExcludedIds != null ? StrUtils.splitSmart(paramExcludedIds, ",", true) : Collections.emptyList();
        return new ElevationBuilder().addElevatedIds(elevatedIds).addExcludedIds(excludedIds).build();
      } else {
        IndexReader reader = rb.req.getSearcher().getIndexReader();
        return getElevationProvider(reader, rb.req.getCore()).getElevationForQuery(queryString);
      }
    } catch (Exception e) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading elevation", e);
    }
  }

  private void setQuery(ResponseBuilder rb, Elevation elevation) {
    rb.req.getContext().put(BOOSTED, elevation.elevatedIds);

    // Change the query to insert forced documents
    SolrParams params = rb.req.getParams();
    if (params.getBool(QueryElevationParams.EXCLUSIVE, false)) {
      // We only want these elevated results
      rb.setQuery(new BoostQuery(elevation.includeQuery, 0f));
    } else {
      BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
      queryBuilder.add(rb.getQuery(), BooleanClause.Occur.SHOULD);
      queryBuilder.add(new BoostQuery(elevation.includeQuery, 0f), BooleanClause.Occur.SHOULD);
      if (elevation.excludeQueries != null) {
        if (params.getBool(QueryElevationParams.MARK_EXCLUDES, false)) {
          // We are only going to mark items as excluded, not actually exclude them.
          // This works with the EditorialMarkerFactory.
          rb.req.getContext().put(EXCLUDED, elevation.excludedIds);
        } else {
          for (TermQuery tq : elevation.excludeQueries) {
            queryBuilder.add(tq, BooleanClause.Occur.MUST_NOT);
          }
        }
      }
      rb.setQuery(queryBuilder.build());
    }
  }

  private void setSort(ResponseBuilder rb, Elevation elevation) throws IOException {
    if (elevation.elevatedIds.isEmpty()) {
      return;
    }
    boolean forceElevation = rb.req.getParams().getBool(QueryElevationParams.FORCE_ELEVATION, this.forceElevation);
    boolean useConfigured = rb.req.getParams().getBool(QueryElevationParams.USE_CONFIGURED_ELEVATED_ORDER, this.useConfiguredElevatedOrder);
    final IntIntHashMap elevatedWithPriority = getBoostDocs(rb.req.getSearcher(), elevation.elevatedIds, rb.req.getContext());
    ElevationComparatorSource comparator = new ElevationComparatorSource(elevatedWithPriority, useConfigured);
    setSortSpec(rb, forceElevation, comparator);
    setGroupingSpec(rb, forceElevation, comparator);
  }

  private void setSortSpec(ResponseBuilder rb, boolean forceElevation, ElevationComparatorSource comparator) {
    // if the sort is 'score desc' use a custom sorting method to
    // insert documents in their proper place
    SortSpec sortSpec = rb.getSortSpec();
    if (sortSpec.getSort() == null) {
      sortSpec.setSortAndFields(
              new Sort(
                      new SortField("_elevate_", comparator, true),
                      new SortField(null, SortField.Type.SCORE, false)),
              Arrays.asList(new SchemaField[2]));
    } else {
      // Check if the sort is based on score
      SortSpec modSortSpec = this.modifySortSpec(sortSpec, forceElevation, comparator);
      if (null != modSortSpec) {
        rb.setSortSpec(modSortSpec);
      }
    }
  }

  private void setGroupingSpec(ResponseBuilder rb, boolean forceElevation, ElevationComparatorSource comparator) {
    // alter the sorting in the grouping specification if there is one
    GroupingSpecification groupingSpec = rb.getGroupingSpec();
    if(groupingSpec != null) {
      SortSpec groupSortSpec = groupingSpec.getGroupSortSpec();
      SortSpec modGroupSortSpec = this.modifySortSpec(groupSortSpec, forceElevation, comparator);
      if (modGroupSortSpec != null) {
        groupingSpec.setGroupSortSpec(modGroupSortSpec);
      }
      SortSpec withinGroupSortSpec = groupingSpec.getWithinGroupSortSpec();
      SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, forceElevation, comparator);
      if (modWithinGroupSortSpec != null) {
        groupingSpec.setWithinGroupSortSpec(modWithinGroupSortSpec);
      }
    }
  }

  private SortSpec modifySortSpec(SortSpec current, boolean forceElevation, ElevationComparatorSource comparator) {
    boolean modify = false;
    SortField[] currentSorts = current.getSort().getSort();
    List currentFields = current.getSchemaFields();

    ArrayList sorts = new ArrayList<>(currentSorts.length + 1);
    List fields = new ArrayList<>(currentFields.size() + 1);

    // Perhaps force it to always sort by score
    if (forceElevation && currentSorts[0].getType() != SortField.Type.SCORE) {
      sorts.add(new SortField("_elevate_", comparator, true));
      fields.add(null);
      modify = true;
    }
    for (int i = 0; i < currentSorts.length; i++) {
      SortField sf = currentSorts[i];
      if (sf.getType() == SortField.Type.SCORE) {
        sorts.add(new SortField("_elevate_", comparator, !sf.getReverse()));
        fields.add(null);
        modify = true;
      }
      sorts.add(sf);
      fields.add(currentFields.get(i));
    }
    return modify ?
            new SortSpec(new Sort(sorts.toArray(new SortField[0])),
                    fields,
                    current.getCount(),
                    current.getOffset())
            : null;
  }

  private void addDebugInfo(ResponseBuilder rb, Elevation elevation) {
    List match = null;
    if (elevation != null) {
      // Extract the elevated terms into a list
      match = new ArrayList<>(elevation.includeQuery.clauses().size());
      for (BooleanClause clause : elevation.includeQuery.clauses()) {
        TermQuery tq = (TermQuery) clause.getQuery();
        match.add(tq.getTerm().text());
      }
    }
    SimpleOrderedMap dbg = new SimpleOrderedMap<>();
    dbg.add("q", rb.getQueryString());
    dbg.add("match", match);
    rb.addDebugInfo("queryBoosting", dbg);
  }

  //---------------------------------------------------------------------------------
  // Boosted docs helper
  //---------------------------------------------------------------------------------

  /**
   * Resolves a set of boosted docs by uniqueKey to a map of docIds mapped to a priority value > 0.
   * @param indexSearcher the SolrIndexSearcher; required
   * @param boosted are the set of uniqueKey values to be boosted in priority order.  If null; returns null.
   * @param context the {@link SolrQueryRequest#getContext()} or null if none.  We'll cache our results here.
   */
  //TODO consider simplifying to remove "boosted" arg which can be looked up in context via BOOSTED key?
  public static IntIntHashMap getBoostDocs(SolrIndexSearcher indexSearcher, Set boosted, Map context) throws IOException {

    IntIntHashMap boostDocs = null;

    if (boosted != null) {

      //First see if it's already in the request context. Could have been put there by another caller.
      if (context != null) {
        boostDocs = (IntIntHashMap) context.get(BOOSTED_DOCIDS);
        if (boostDocs != null) {
          return boostDocs;
        }
      }

      //Not in the context yet so load it.
      boostDocs = new IntIntHashMap(boosted.size()); // docId to boost
      int priority = boosted.size() + 1; // the corresponding priority for each boosted key (starts at this; decrements down)
      for (BytesRef uniqueKey : boosted) {
        priority--; // therefore first == bosted.size(); last will be 1
        long segAndId = indexSearcher.lookupId(uniqueKey); // higher 32 bits == segment ID, low 32 bits == doc ID
        if (segAndId == -1) { // not found
          continue;
        }
        int seg = (int) (segAndId >> 32);
        int localDocId = (int) segAndId;
        final IndexReaderContext indexReaderContext = indexSearcher.getTopReaderContext().children().get(seg);
        int docId = indexReaderContext.docBaseInParent + localDocId;
        boostDocs.put(docId, priority);
      }
      assert priority == 1; // the last priority (lowest)
    }

    if (context != null) {
      //noinspection unchecked
      context.put(BOOSTED_DOCIDS, boostDocs);
    }

    return boostDocs;
  }

  //---------------------------------------------------------------------------------
  // SolrInfoBean
  //---------------------------------------------------------------------------------

  @Override
  public String getDescription() {
    return "Query Boosting -- boost particular documents for a given query";
  }

  //---------------------------------------------------------------------------------
  // Overrides
  //---------------------------------------------------------------------------------

  /**
   * Creates the {@link ElevationProvider} to set during configuration loading. The same instance will be used later
   * when elevating results for queries.
   *
   * @param elevationBuilderMap map of all {@link ElevatingQuery} and their corresponding {@link ElevationBuilder}.
   * @return The created {@link ElevationProvider}.
   */
  protected ElevationProvider createElevationProvider(Map elevationBuilderMap) {
    return new DefaultElevationProvider(new TrieSubsetMatcher.Builder<>(), elevationBuilderMap);
  }

  //---------------------------------------------------------------------------------
  // Query analysis and tokenization
  //---------------------------------------------------------------------------------

  /**
   * Analyzes the provided query string and returns a concatenation of the analyzed tokens.
   */
  public String analyzeQuery(String query) {
    StringBuilder concatTerms = new StringBuilder();
    analyzeQuery(query, concatTerms::append);
    return concatTerms.toString();
  }

  /**
   * Analyzes the provided query string, tokenizes the terms, and adds them to the provided {@link Consumer}.
   */
  protected void analyzeQuery(String query, Consumer termsConsumer) {
    try (TokenStream tokens = queryAnalyzer.tokenStream("", query)) {
      tokens.reset();
      CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
      while (tokens.incrementToken()) {
        termsConsumer.accept(termAtt);
      }
      tokens.end();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  //---------------------------------------------------------------------------------
  // Testing
  //---------------------------------------------------------------------------------

  /**
   * Helpful for testing without loading config.xml.
   *
   * @param reader      The {@link org.apache.lucene.index.IndexReader}.
   * @param queryString The query for which to elevate some documents. If the query has already been defined an
   *                    elevation, this method overwrites it.
   * @param subsetMatch true for query subset match; false for query exact match.
   * @param elevatedIds The readable ids of the documents to set as top results for the provided query.
   * @param excludedIds The readable ids of the document to exclude from results for the provided query.
   */
  @VisibleForTesting
  void setTopQueryResults(IndexReader reader, String queryString, boolean subsetMatch,
                          String[] elevatedIds, String[] excludedIds) {
    clearElevationProviderCache();
    ElevatingQuery elevatingQuery = new ElevatingQuery(queryString, subsetMatch);
    ElevationBuilder elevationBuilder = new ElevationBuilder();
    elevationBuilder.addElevatedIds(elevatedIds == null ? Collections.emptyList() : Arrays.asList(elevatedIds));
    elevationBuilder.addExcludedIds(excludedIds == null ? Collections.emptyList() : Arrays.asList(excludedIds));
    Map elevationBuilderMap = ImmutableMap.of(elevatingQuery, elevationBuilder);
    synchronized (elevationProviderCache) {
      elevationProviderCache.computeIfAbsent(reader, k -> createElevationProvider(elevationBuilderMap));
    }
  }

  @VisibleForTesting
  void clearElevationProviderCache() {
    synchronized (elevationProviderCache) {
        elevationProviderCache.clear();
    }
  }

  //---------------------------------------------------------------------------------
  // Exception
  //---------------------------------------------------------------------------------

  private static class InitializationException extends Exception {

    private final InitializationExceptionCause exceptionCause;

    InitializationException(String message, InitializationExceptionCause exceptionCause) {
      super(message);
      this.exceptionCause = exceptionCause;
    }
  }

  protected enum InitializationExceptionCause {
    /**
     * The component parameter {@link #FIELD_TYPE} defines an unknown field type.
     */
    UNKNOWN_FIELD_TYPE,
    /**
     * This component requires the schema to have a uniqueKeyField, which it does not have.
     */
    MISSING_UNIQUE_KEY_FIELD,
    /**
     * Missing component parameter {@link #CONFIG_FILE} - it has to define the path to the elevation configuration file (e.g. elevate.xml).
     */
    NO_CONFIG_FILE_DEFINED,
    /**
     * The elevation configuration file (e.g. elevate.xml) cannot be found, or is defined in both conf/ and data/ directories.
     */
    MISSING_CONFIG_FILE,
    /**
     * The elevation configuration file (e.g. elevate.xml) is empty.
     */
    EMPTY_CONFIG_FILE,
    /**
     * Unclassified exception cause.
     */
    OTHER,
  }

  //---------------------------------------------------------------------------------
  // Elevation classes
  //---------------------------------------------------------------------------------

  /**
   * Provides the elevations defined for queries.
   */
  protected interface ElevationProvider {
    /**
     * Gets the elevation associated to the provided query.
     * 

* By contract and by design, only one elevation may be associated * to a given query (this can be safely verified by an assertion). * * @param queryString The query string (not {@link #analyzeQuery(String) analyzed} yet, * this {@link ElevationProvider} is in charge of analyzing it). * @return The elevation associated with the query; or null if none. */ Elevation getElevationForQuery(String queryString); /** * Gets the number of query elevations in this {@link ElevationProvider}. */ @VisibleForTesting int size(); } /** * {@link ElevationProvider} that returns no elevation. */ @SuppressWarnings("WeakerAccess") protected static final ElevationProvider NO_OP_ELEVATION_PROVIDER = new ElevationProvider() { @Override public Elevation getElevationForQuery(String queryString) { return null; } @Override public int size() { return 0; } }; /** * Provides elevations with either: *

    *
  • subset match - all the elevating terms are matched in the search query, in any order.
  • *
  • exact match - the elevating query matches fully (all terms in same order) the search query.
  • *
* The terms are tokenized with the query analyzer. */ protected class DefaultElevationProvider implements ElevationProvider { private final TrieSubsetMatcher subsetMatcher; private final Map exactMatchElevationMap; /** * @param subsetMatcherBuilder The {@link TrieSubsetMatcher.Builder} to build the {@link TrieSubsetMatcher}. * @param elevationBuilderMap The map of elevation rules. */ protected DefaultElevationProvider(TrieSubsetMatcher.Builder subsetMatcherBuilder, Map elevationBuilderMap) { exactMatchElevationMap = new LinkedHashMap<>(); Collection queryTerms = new ArrayList<>(); Consumer termsConsumer = term -> queryTerms.add(term.toString()); StringBuilder concatTerms = new StringBuilder(); Consumer concatConsumer = concatTerms::append; for (Map.Entry entry : elevationBuilderMap.entrySet()) { ElevatingQuery elevatingQuery = entry.getKey(); Elevation elevation = entry.getValue().build(); if (elevatingQuery.subsetMatch) { queryTerms.clear(); analyzeQuery(elevatingQuery.queryString, termsConsumer); subsetMatcherBuilder.addSubset(queryTerms, elevation); } else { concatTerms.setLength(0); analyzeQuery(elevatingQuery.queryString, concatConsumer); exactMatchElevationMap.put(concatTerms.toString(), elevation); } } this.subsetMatcher = subsetMatcherBuilder.build(); } @Override public Elevation getElevationForQuery(String queryString) { boolean hasExactMatchElevationRules = exactMatchElevationMap.size() != 0; if (subsetMatcher.getSubsetCount() == 0) { if (!hasExactMatchElevationRules) { return null; } return exactMatchElevationMap.get(analyzeQuery(queryString)); } Collection queryTerms = new ArrayList<>(); Consumer termsConsumer = term -> queryTerms.add(term.toString()); StringBuilder concatTerms = null; if (hasExactMatchElevationRules) { concatTerms = new StringBuilder(); termsConsumer = termsConsumer.andThen(concatTerms::append); } analyzeQuery(queryString, termsConsumer); Elevation mergedElevation = null; if (hasExactMatchElevationRules) { mergedElevation = exactMatchElevationMap.get(concatTerms.toString()); } Iterator elevationIterator = subsetMatcher.findSubsetsMatching(queryTerms); while (elevationIterator.hasNext()) { Elevation elevation = elevationIterator.next(); mergedElevation = mergedElevation == null ? elevation : mergedElevation.mergeWith(elevation); } return mergedElevation; } @Override public int size() { return exactMatchElevationMap.size() + subsetMatcher.getSubsetCount(); } } /** * Query triggering elevation. */ @SuppressWarnings("WeakerAccess") protected static class ElevatingQuery { public final String queryString; public final boolean subsetMatch; /** * @param queryString The query to elevate documents for (not the analyzed form). * @param subsetMatch Whether to match a subset of query terms. */ protected ElevatingQuery(String queryString, boolean subsetMatch) { this.queryString = queryString; this.subsetMatch = subsetMatch; } @Override public boolean equals(Object o) { if (!(o instanceof ElevatingQuery)) { return false; } ElevatingQuery eq = (ElevatingQuery) o; return queryString.equals(eq.queryString) && subsetMatch == eq.subsetMatch; } @Override public int hashCode() { return queryString.hashCode() + (subsetMatch ? 1 : 0); } } /** * Builds an {@link Elevation}. This class is used to start defining query elevations, but allowing the merge of * multiple elevations for the same query. */ @SuppressWarnings("WeakerAccess") public class ElevationBuilder { /** * The ids of the elevated documents that should appear on top of search results; can be null. * The order is retained. */ private LinkedHashSet elevatedIds; /** * The ids of the excluded documents that should not appear in search results; can be null. */ private Set excludedIds; // for temporary/transient use when adding an elevated or excluded ID private final BytesRefBuilder scratch = new BytesRefBuilder(); public ElevationBuilder addElevatedIds(List ids) { if (elevatedIds == null) { elevatedIds = new LinkedHashSet<>(Math.max(10, ids.size())); } for (String id : ids) { elevatedIds.add(toBytesRef(id)); } return this; } public ElevationBuilder addExcludedIds(Collection ids) { if (excludedIds == null) { excludedIds = new HashSet<>(Math.max(10, ids.size())); } for (String id : ids) { excludedIds.add(toBytesRef(id)); } return this; } public BytesRef toBytesRef(String id) { uniqueKeyField.getType().readableToIndexed(id, scratch); return scratch.toBytesRef(); } public ElevationBuilder merge(ElevationBuilder elevationBuilder) { if (elevatedIds == null) { elevatedIds = elevationBuilder.elevatedIds; } else if (elevationBuilder.elevatedIds != null) { elevatedIds.addAll(elevationBuilder.elevatedIds); } if (excludedIds == null) { excludedIds = elevationBuilder.excludedIds; } else if (elevationBuilder.excludedIds != null) { excludedIds.addAll(elevationBuilder.excludedIds); } return this; } public Elevation build() { return new Elevation(elevatedIds, excludedIds, uniqueKeyField.getName()); } } /** * Elevation of some documents in search results, with potential exclusion of others. * Immutable. */ protected static class Elevation { private static final BooleanQuery EMPTY_QUERY = new BooleanQuery.Builder().build(); public final Set elevatedIds; // in configured order; not null public final BooleanQuery includeQuery; // not null public final Set excludedIds; // not null //just keep the term query, b/c we will not always explicitly exclude the item based on markExcludes query time param public final TermQuery[] excludeQueries; //may be null /** * Constructs an elevation. * * @param elevatedIds The ids of the elevated documents that should appear on top of search results, in configured order; * can be null. * @param excludedIds The ids of the excluded documents that should not appear in search results; can be null. * @param queryFieldName The field name to use to create query terms. */ public Elevation(Set elevatedIds, Set excludedIds, String queryFieldName) { if (elevatedIds == null || elevatedIds.isEmpty()) { includeQuery = EMPTY_QUERY; this.elevatedIds = Collections.emptySet(); } else { this.elevatedIds = ImmutableSet.copyOf(elevatedIds); BooleanQuery.Builder includeQueryBuilder = new BooleanQuery.Builder(); for (BytesRef elevatedId : elevatedIds) { includeQueryBuilder.add(new TermQuery(new Term(queryFieldName, elevatedId)), BooleanClause.Occur.SHOULD); } includeQuery = includeQueryBuilder.build(); } if (excludedIds == null || excludedIds.isEmpty()) { this.excludedIds = Collections.emptySet(); excludeQueries = null; } else { this.excludedIds = ImmutableSet.copyOf(excludedIds); List excludeQueriesBuilder = new ArrayList<>(excludedIds.size()); for (BytesRef excludedId : excludedIds) { excludeQueriesBuilder.add(new TermQuery(new Term(queryFieldName, excludedId))); } excludeQueries = excludeQueriesBuilder.toArray(new TermQuery[0]); } } protected Elevation(Set elevatedIds, BooleanQuery includeQuery, Set excludedIds, TermQuery[] excludeQueries) { this.elevatedIds = elevatedIds; this.includeQuery = includeQuery; this.excludedIds = excludedIds; this.excludeQueries = excludeQueries; } /** * Merges this {@link Elevation} with another and creates a new {@link Elevation}. * @return A new instance containing the merging of the two elevations; or directly this elevation if the other * is null. */ protected Elevation mergeWith(Elevation elevation) { if (elevation == null) { return this; } Set elevatedIds = ImmutableSet.builder().addAll(this.elevatedIds).addAll(elevation.elevatedIds).build(); boolean overlappingElevatedIds = elevatedIds.size() != (this.elevatedIds.size() + elevation.elevatedIds.size()); BooleanQuery.Builder includeQueryBuilder = new BooleanQuery.Builder(); Set clauseSet = (overlappingElevatedIds ? Sets.newHashSetWithExpectedSize(elevatedIds.size()) : null); for (BooleanClause clause : this.includeQuery.clauses()) { if (!overlappingElevatedIds || clauseSet.add(clause)) { includeQueryBuilder.add(clause); } } for (BooleanClause clause : elevation.includeQuery.clauses()) { if (!overlappingElevatedIds || clauseSet.add(clause)) { includeQueryBuilder.add(clause); } } Set excludedIds = ImmutableSet.builder().addAll(this.excludedIds).addAll(elevation.excludedIds).build(); TermQuery[] excludeQueries; if (this.excludeQueries == null) { excludeQueries = elevation.excludeQueries; } else if (elevation.excludeQueries == null) { excludeQueries = this.excludeQueries; } else { boolean overlappingExcludedIds = excludedIds.size() != (this.excludedIds.size() + elevation.excludedIds.size()); if (overlappingExcludedIds) { excludeQueries = ImmutableSet.builder().add(this.excludeQueries).add(elevation.excludeQueries) .build().toArray(new TermQuery[0]); } else { excludeQueries = ObjectArrays.concat(this.excludeQueries, elevation.excludeQueries, TermQuery.class); } } return new Elevation(elevatedIds, includeQueryBuilder.build(), excludedIds, excludeQueries); } @Override public String toString() { return "{elevatedIds=" + Collections2.transform(elevatedIds, BytesRef::utf8ToString) + ", excludedIds=" + Collections2.transform(excludedIds, BytesRef::utf8ToString) + "}"; } } /** Elevates certain docs to the top. */ private class ElevationComparatorSource extends FieldComparatorSource { private final IntIntHashMap elevatedWithPriority; private final boolean useConfiguredElevatedOrder; private final int[] sortedElevatedDocIds; private ElevationComparatorSource(IntIntHashMap elevatedWithPriority, boolean useConfiguredElevatedOrder) { this.elevatedWithPriority = elevatedWithPriority; this.useConfiguredElevatedOrder = useConfiguredElevatedOrder; // copy elevatedWithPriority keys (doc IDs) into sortedElevatedDocIds, sorted sortedElevatedDocIds = new int[elevatedWithPriority.size()]; final Iterator iterator = elevatedWithPriority.iterator(); for (int i = 0; i < sortedElevatedDocIds.length; i++) { IntIntCursor next = iterator.next(); sortedElevatedDocIds[i] = next.key; } assert iterator.hasNext() == false; Arrays.sort(sortedElevatedDocIds); } @Override public FieldComparator newComparator(String fieldName, final int numHits, int sortPos, boolean reversed) { return new SimpleFieldComparator() { final int[] values = new int[numHits]; int bottomVal; int topVal; int docBase; boolean hasElevatedDocsThisSegment; @Override protected void doSetNextReader(LeafReaderContext context) { docBase = context.docBase; // ascertain if hasElevatedDocsThisSegment final int idx = Arrays.binarySearch(sortedElevatedDocIds, docBase); if (idx < 0) { //first doc in segment isn't elevated (typical). Maybe another is? int nextIdx = -idx - 1; if (nextIdx < sortedElevatedDocIds.length) { int nextElevatedDocId = sortedElevatedDocIds[nextIdx]; if (nextElevatedDocId > docBase + context.reader().maxDoc()) { hasElevatedDocsThisSegment = false; return; } } } hasElevatedDocsThisSegment = true; } @Override public int compare(int slot1, int slot2) { return values[slot1] - values[slot2]; // values will be small enough that there is no overflow concern } @Override public void setBottom(int slot) { bottomVal = values[slot]; } @Override public void setTopValue(Integer value) { topVal = value; } private int docVal(int doc) { if (!hasElevatedDocsThisSegment) { assert elevatedWithPriority.containsKey(docBase + doc) == false; return -1; } else if (useConfiguredElevatedOrder) { return elevatedWithPriority.getOrDefault(docBase + doc, -1); } else { return elevatedWithPriority.containsKey(docBase + doc) ? 1 : -1; } } @Override public int compareBottom(int doc) { return bottomVal - docVal(doc); } @Override public void copy(int slot, int doc) { values[slot] = docVal(doc); } @Override public Integer value(int slot) { return values[slot]; } @Override public int compareTop(int doc) { final int docValue = docVal(doc); return topVal - docValue; // values will be small enough that there is no overflow concern } }; } } /** * Matches a potentially large collection of subsets with a trie implementation. *

* Given a collection of subsets N, finds all the subsets that are contained (ignoring duplicate elements) * by a provided set s. * That is, finds all subsets n in N for which s.containsAll(n) * (s contains all the elements of n, in any order). *

* Associates a match value of type <M> to each subset and provides it each time the subset matches (i.e. is * contained by the provided set). *

* This matcher imposes the elements are {@link Comparable}. * It does not keep the subset insertion order. * Duplicate subsets stack their match values. *

* The time complexity of adding a subset is O(n.log(n)), where n is the size of the subset. *

* The worst case time complexity of the subset matching is O(2^s), however a more typical case time * complexity is O(s^3) where s is the size of the set to partially match. * Note it does not depend on N, the size of the collection of subsets, nor on n, the size of * a subset. * * @param Subset element type. * @param Subset match value type. */ protected static class TrieSubsetMatcher, M> { /* Trie structure: --------------- - A subset element on each edge. - Each node may contain zero or more match values. Sample construction: -------------------- - given the subsets "B A C", "A B", "A B A", "B", "D B". - remove duplicates and sort each subset => "A B C", "A B", "A B", "B", "B D". - N() means a node with no match value. - N(x, y) means a node with 2 match values x and y. root --A--> N() --B--> N("A B", "A B A") --C--> N("B A C") --B--> N("B") --D--> N("D B") Subset matching algorithm: -------------------------- - given a set s In the above sample, with s="A B C B", then the matching subsets are "B A C", "A B", "A B A", "B" remove duplicates in s sort s keep a queue Q of current nodes Add root node to Q Another queue Q' will hold the child nodes (initially empty) for each element e in s { for each current node in Q { if current node has a child for edge e { add the child to Q' record the child match values } if e is greater than or equal to current node greatest edge { remove current node from Q (as we are sure this current node children cannot match anymore) } } Move all child nodes from Q' to Q } Time complexity: ---------------- s = size of the set to partially match N = size of the collection of subsets n = size of a subset The time complexity depends on the number of current nodes in Q. The worst case time complexity: For a given set s: - initially Q contains only 1 current node, the root => 1 node - for first element e1 in s, at most 1 node is added to Q => 2 nodes - for element e2 in s, at most 2 new nodes are added to Q => 4 nodes - for element e3 in s, at most 4 new nodes are added to Q => 8 nodes - for element ek in s, at most 2^(k-1) new nodes are added to Q => 2^k nodes - however there are, in worst case, a maximum of N.n nodes Sum[k=0 to s](2^k) = 2^(s+1)-1 So the worst case time complexity is: min(O(2^s), O(s.N.n)) A more typical case time complexity: For a given set s: - initially Q contains only 1 current node, the root => 1 node - for first element e1 in s, 1 node is added to Q => 2 nodes - for element e2 in s, 2 new nodes are added to Q => 4 nodes - for element e3 in s, 3 new nodes are added to Q => 7 nodes - for element ek in s, k new nodes are added to Q => previous nodes + k : q(k) = q(k-1) + k Solution is q(k) = 1/2 (k^2+k+2) Sum[k=0 to s](k^2+k+2)/2 = 1/6 (s+1) (s^2+2s+6) So a more typical case time complexity is: min(O(s^3), O(s.N.n)) */ public static class Builder, M> { private final TrieSubsetMatcher.Node root = new TrieSubsetMatcher.Node<>(); private int subsetCount; /** * Adds a subset. If the subset is already registered, the new match value is added to the previous one(s). * * @param subset The subset of {@link Comparable} elements; it is copied. It is ignored if its size is 0. * Any subset added is guaranteed to be returned by {@link TrieSubsetMatcher#findSubsetsMatching} * if it matches (i.e. is contained), even if two or more subsets are equal, or equal when ignoring * duplicate elements. * @param matchValue The match value provided each time the subset matches. * @return This builder. */ public Builder addSubset(Collection subset, M matchValue) { if (!subset.isEmpty()) { TrieSubsetMatcher.Node node = root; for (E e : ImmutableSortedSet.copyOf(subset)) { node = node.getOrCreateChild(e); } node.addMatchValue(matchValue); subsetCount++; } return this; } public TrieSubsetMatcher build() { root.trimAndMakeImmutable(); return new TrieSubsetMatcher<>(root, subsetCount); } } private final Node root; private final int subsetCount; private TrieSubsetMatcher(Node root, int subsetCount) { this.root = root; this.subsetCount = subsetCount; } /** * Gets the number of subsets in this matcher. */ public int getSubsetCount() { return subsetCount; } /** * Returns an iterator over all the subsets that are contained by the provided set. * The returned iterator does not support removal. * * @param set This set is copied to a new {@link ImmutableSortedSet} with natural ordering. */ public Iterator findSubsetsMatching(Collection set) { return new MatchIterator(ImmutableSortedSet.copyOf(set)); } /** * Trie node. */ private static class Node, M> { private Map> children; private E greatestEdge; private List matchValues; /** * Gets the child node for the provided element; or null if none. */ Node getChild(E e) { return (children == null ? null : children.get(e)); } /** * Gets the child node for the provided element, or creates it if it does not exist. */ Node getOrCreateChild(E e) { if (children == null) { children = new HashMap<>(4); } Node child = children.get(e); if (child == null) { child = new Node<>(); children.put(e, child); if (greatestEdge == null || e.compareTo(greatestEdge) > 0) { greatestEdge = e; } } return child; } /** * Indicates whether this node has more children for edges greater than the given element. * * @return true if this node has more children for edges greater than the given element; * false otherwise. */ boolean hasMorePotentialChildren(E e) { return greatestEdge != null && e.compareTo(greatestEdge) < 0; } /** * Decorates this node with an additional match value. */ void addMatchValue(M matchValue) { if (matchValues == null) { matchValues = new ArrayList<>(1); } matchValues.add(matchValue); } /** * Gets the match values decorating this node. */ List getMatchValues() { return (matchValues == null ? Collections.emptyList() : matchValues); } /** * Trims and makes this node, as well as all descendant nodes, immutable. * This may reduce its memory usage and make it more efficient. */ void trimAndMakeImmutable() { if (children != null && !(children instanceof ImmutableMap)) { for (Node child : children.values()) child.trimAndMakeImmutable(); children = ImmutableMap.copyOf(children); } if (matchValues != null && !(matchValues instanceof ImmutableList)) { matchValues = ImmutableList.copyOf(matchValues); } } } private class MatchIterator implements Iterator { private final Iterator sortedSetIterator; private final Queue> currentNodes; private final Queue nextMatchValues; MatchIterator(SortedSet set) { sortedSetIterator = set.iterator(); currentNodes = new ArrayDeque<>(); currentNodes.offer(root); nextMatchValues = new ArrayDeque<>(); } @Override public boolean hasNext() { return !nextMatchValues.isEmpty() || nextSubsetMatch(); } @Override public M next() { if (!hasNext()) { throw new NoSuchElementException(); } assert !nextMatchValues.isEmpty(); return nextMatchValues.poll(); } @Override public void remove() { throw new UnsupportedOperationException(); } private boolean nextSubsetMatch() { while (sortedSetIterator.hasNext()) { E e = sortedSetIterator.next(); int currentNodeCount = currentNodes.size(); for (int i = 0; i < currentNodeCount; i++) { TrieSubsetMatcher.Node currentNode = currentNodes.remove(); TrieSubsetMatcher.Node child = currentNode.getChild(e); if (child != null) { currentNodes.offer(child); nextMatchValues.addAll(child.getMatchValues()); } if (currentNode.hasMorePotentialChildren(e)) { currentNodes.offer(currentNode); } } if (!nextMatchValues.isEmpty()) { return true; } } return false; } } } }