org.apache.solr.handler.component.ExpandComponent Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.component;

import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ExpandParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.search.CollapsingQParserPlugin;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryLimits;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpecParsing;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.SolrResponseUtil;
import org.apache.solr.util.plugin.PluginInfoInitialized;

/**
 * The ExpandComponent is designed to work with the CollapsingPostFilter. The CollapsingPostFilter
 * collapses a result set on a field.
 *
 * The ExpandComponent expands the collapsed groups for a single page. When multiple collapse
 * groups are specified then, the field is chosen from collapse group with min cost. If the cost are
 * equal then, the field is chosen from first collapse group.
 *
 * 
http parameters:
 *
 * 
expand=true 

 * expand.rows=5 

 * expand.sort=field asc|desc

 * expand.q=*:* (optional, overrides the main query)

 * expand.fq=type:child (optional, overrides the main filter queries)

 * expand.field=field (mandatory, if the not used with the CollapsingQParserPlugin. This is given
 * higher priority when both are present)

 */
public class ExpandComponent extends SearchComponent implements PluginInfoInitialized {
  public static final String COMPONENT_NAME = "expand";
  private static final int finishingStage = ResponseBuilder.STAGE_GET_FIELDS;
  private PluginInfo info = PluginInfo.EMPTY_INFO;

  @Override
  public void init(PluginInfo info) {
    this.info = info;
  }

  @Override
  public void prepare(ResponseBuilder rb) throws IOException {
    if (rb.req.getParams().getBool(ExpandParams.EXPAND, false)) {
      if (rb.req.getParams().getBool(GroupParams.GROUP, false)) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "Can not use expand with Grouping enabled");
      }
      rb.doExpand = true;
    }
  }

  @Override
  public void process(ResponseBuilder rb) throws IOException {

    if (!rb.doExpand) {
      return;
    }

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();

    String field = params.get(ExpandParams.EXPAND_FIELD);
    String hint = null;
    if (field == null) {
      List filters = rb.getFilters();
      if (filters != null) {
        int cost = Integer.MAX_VALUE;
        for (Query q : filters) {
          if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
            CollapsingQParserPlugin.CollapsingPostFilter cp =
                (CollapsingQParserPlugin.CollapsingPostFilter) q;
            // if there are multiple collapse pick the low cost one
            // if cost are equal then first one is picked
            if (cp.getCost() < cost) {
              cost = cp.getCost();
              field = cp.getField();
              hint = cp.hint;
            }
          }
        }
      }
    }

    if (field == null) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "missing expand field");
    }

    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);

    Sort sort = null;

    if (sortParam != null) {
      sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
    }

    final Query query;
    List newFilters = new ArrayList<>();
    try {
      if (qs == null) {
        query = rb.getQuery();
      } else {
        QParser parser = QParser.getParser(qs, req);
        query = parser.getQuery();
      }

      if (fqs == null) {
        List filters = rb.getFilters();
        if (filters != null) {
          for (Query q : filters) {
            if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
              newFilters.add(q);
            }
          }
        }
      } else {
        for (String fq : fqs) {
          if (StrUtils.isNotBlank(fq) && !fq.equals("*:*")) {
            QParser fqp = QParser.getParser(fq, req);
            newFilters.add(fqp.getQuery());
          }
        }
      }
    } catch (SyntaxError e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
    }

    SolrIndexSearcher searcher = req.getSearcher();
    LeafReader reader = searcher.getSlowAtomicReader();

    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();

    SortedDocValues values = null;

    if (fieldType instanceof StrField) {
      // Get The Top Level SortedDocValues
      if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
        LeafReader uninvertingReader =
            CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
        values = uninvertingReader.getSortedDocValues(field);
      } else {
        values = DocValues.getSorted(reader, field);
      }
    } else if (fieldType.getNumberType() == null) {
      // possible if directly expand.field is specified
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "Expand not supported for fieldType:'" + fieldType.getTypeName() + "'");
    }

    FixedBitSet groupBits = null;
    LongHashSet groupSet = null;
    DocList docList = rb.getResults().docList;
    IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);

    // Gather the groups for the current page of documents
    DocIterator idit = docList.iterator();
    int[] globalDocs = new int[docList.size()];
    int docsIndex = -1;
    while (idit.hasNext()) {
      globalDocs[++docsIndex] = idit.nextDoc();
    }

    Arrays.sort(globalDocs);
    Query groupQuery = null;

    /*
     * This code gathers the group information for the current page.
     */
    List contexts = searcher.getTopReaderContext().leaves();

    if (contexts.size() == 0) {
      // When no context is available we can skip the expanding
      return;
    }
    QueryLimits queryLimits = QueryLimits.getCurrentLimits();
    if (queryLimits.maybeExitWithPartialResults("Expand process")) {
      return;
    }

    boolean nullGroupOnCurrentPage = false;
    int currentContext = 0;
    int currentDocBase = contexts.get(currentContext).docBase;
    int nextDocBase =
        (currentContext + 1) < contexts.size()
            ? contexts.get(currentContext + 1).docBase
            : Integer.MAX_VALUE;
    IntObjectHashMap ordBytes = null;
    if (values != null) {
      groupBits = new FixedBitSet(values.getValueCount());
      OrdinalMap ordinalMap = null;
      SortedDocValues[] sortedDocValues = null;
      LongValues segmentOrdinalMap = null;
      SortedDocValues currentValues = null;
      if (values instanceof MultiDocValues.MultiSortedDocValues) {
        ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
        sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
        currentValues = sortedDocValues[currentContext];
        segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
      }

      ordBytes = new IntObjectHashMap<>();

      for (int i = 0; i < globalDocs.length; i++) {
        int globalDoc = globalDocs[i];
        while (globalDoc >= nextDocBase) {
          currentContext++;
          currentDocBase = contexts.get(currentContext).docBase;
          nextDocBase =
              (currentContext + 1) < contexts.size()
                  ? contexts.get(currentContext + 1).docBase
                  : Integer.MAX_VALUE;
          if (ordinalMap != null) {
            currentValues = sortedDocValues[currentContext];
            segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
          }
        }
        collapsedSet.add(globalDoc);
        int contextDoc = globalDoc - currentDocBase;
        if (ordinalMap != null) {
          if (contextDoc > currentValues.docID()) {
            currentValues.advance(contextDoc);
          }
          if (contextDoc == currentValues.docID()) {
            int contextOrd = currentValues.ordValue();
            int ord = (int) segmentOrdinalMap.get(contextOrd);
            if (!groupBits.getAndSet(ord)) {
              BytesRef ref = currentValues.lookupOrd(contextOrd);
              ordBytes.put(ord, BytesRef.deepCopyOf(ref));
            }
          } else {
            nullGroupOnCurrentPage = true;
          }

        } else {
          if (globalDoc > values.docID()) {
            values.advance(globalDoc);
          }
          if (globalDoc == values.docID()) {
            int ord = values.ordValue();
            if (!groupBits.getAndSet(ord)) {
              BytesRef ref = values.lookupOrd(ord);
              ordBytes.put(ord, BytesRef.deepCopyOf(ref));
            }
          } else {
            nullGroupOnCurrentPage = true;
          }
        }
      }

      int count = ordBytes.size();
      if (count > 0 && count < 200) {
        groupQuery = getGroupQuery(field, count, ordBytes);
      }
    } else {
      groupSet = new LongHashSet(docList.size());
      NumericDocValues collapseValues =
          contexts.get(currentContext).reader().getNumericDocValues(field);
      for (int i = 0; i < globalDocs.length; i++) {
        int globalDoc = globalDocs[i];
        while (globalDoc >= nextDocBase) {
          currentContext++;
          currentDocBase = contexts.get(currentContext).docBase;
          nextDocBase =
              currentContext + 1 < contexts.size()
                  ? contexts.get(currentContext + 1).docBase
                  : Integer.MAX_VALUE;
          collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
        }
        collapsedSet.add(globalDoc);
        int contextDoc = globalDoc - currentDocBase;
        int valueDocID = collapseValues.docID();
        if (valueDocID < contextDoc) {
          valueDocID = collapseValues.advance(contextDoc);
        }
        if (valueDocID == contextDoc) {
          final long value = collapseValues.longValue();
          groupSet.add(value);
        } else {
          nullGroupOnCurrentPage = true;
        }
      }

      int count = groupSet.size();
      if (count > 0 && count < 200) {
        if (fieldType.isPointField()) {
          groupQuery = getPointGroupQuery(schemaField, count, groupSet);
        } else {
          groupQuery = getGroupQuery(field, fieldType, count, groupSet);
        }
      }
    }

    final boolean expandNullGroup =
        params.getBool(ExpandParams.EXPAND_NULL, false)
            &&
            // Our GroupCollector can typically ignore nulls (and the user's nullGroup param) unless
            // the current page had any - but if expand.q was specified, current page doesn't mater:
            // We need look for nulls if the user asked us to because we don't know what the
            // expand.q will match
            (nullGroupOnCurrentPage || (null != query));

    if (expandNullGroup && null != groupQuery) {
      // we need to also consider docs w/o a field value
      final BooleanQuery.Builder inner = new BooleanQuery.Builder();
      inner.add(fieldType.getExistenceQuery(null, schemaField), BooleanClause.Occur.MUST_NOT);
      inner.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
      final BooleanQuery.Builder outer = new BooleanQuery.Builder();
      outer.add(inner.build(), BooleanClause.Occur.SHOULD);
      outer.add(groupQuery, BooleanClause.Occur.SHOULD);
      groupQuery = outer.build();
    }

    Collector collector;
    if (sort != null) sort = sort.rewrite(searcher);

    GroupCollector groupExpandCollector = null;

    if (values != null) {
      // Get The Top Level SortedDocValues again so we can re-iterate:
      if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
        LeafReader uninvertingReader =
            CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
        values = uninvertingReader.getSortedDocValues(field);
      } else {
        values = DocValues.getSorted(reader, field);
      }

      groupExpandCollector =
          new GroupExpandCollector(
              limit,
              sort,
              query,
              expandNullGroup,
              fieldType,
              ordBytes,
              values,
              groupBits,
              collapsedSet);
    } else {
      groupExpandCollector =
          new NumericGroupExpandCollector(
              limit,
              sort,
              query,
              expandNullGroup,
              fieldType,
              ordBytes,
              field,
              groupSet,
              collapsedSet);
    }

    if (groupQuery != null) {
      // Limits the results to documents that are in the same group as the documents in the page.
      newFilters.add(groupQuery);
    }

    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
      pfilter.postFilter.setLastDelegate(groupExpandCollector);
      collector = pfilter.postFilter;
    } else {
      collector = groupExpandCollector;
    }

    searcher.search(QueryUtils.combineQueryAndFilter(query, pfilter.filter), collector);
    if (queryLimits.maybeExitWithPartialResults("Expand expand")) {
      return;
    }

    rb.rsp.add("expanded", groupExpandCollector.getGroups(searcher, rb.rsp.getReturnFields()));
  }

  @Override
  public int distributedProcess(ResponseBuilder rb) throws IOException {
    if (rb.doExpand && rb.stage < finishingStage) {
      return finishingStage;
    }
    return ResponseBuilder.STAGE_DONE;
  }

  @Override
  public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) return;
    if (!rb.onePassDistributedQuery && (sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) == 0) {
      sreq.params.set(COMPONENT_NAME, "false");
    } else {
      sreq.params.set(COMPONENT_NAME, "true");
    }
  }

  @Override
  public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {

    if (!rb.doExpand) {
      return;
    }
    if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
      SolrQueryRequest req = rb.req;
      @SuppressWarnings("unchecked")
      NamedList