org.apache.solr.handler.MoreLikeThisHandler Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler;

import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.api.AnnotatedApi;
import org.apache.solr.api.Api;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.MoreLikeThisParams.TermStyle;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.CollectionUtil;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.handler.admin.api.MoreLikeThisAPI;
import org.apache.solr.handler.component.FacetComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.QueryLimits;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.security.AuthorizationContext;
import org.apache.solr.util.SolrPluginUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Solr MoreLikeThis --
 *
 * Return similar documents either based on a single document or based on posted text.
 *
 * @since solr 1.3
 */
public class MoreLikeThisHandler extends RequestHandlerBase {
  // Pattern is thread safe -- TODO? share this with general 'fl' param
  private static final Pattern splitList = Pattern.compile(",| ");

  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  static final String ERR_MSG_QUERY_OR_TEXT_REQUIRED =
      "MoreLikeThis requires either a query (?q=) or text to find similar documents.";

  static final String ERR_MSG_SINGLE_STREAM_ONLY =
      "MoreLikeThis does not support multiple ContentStreams";

  @Override
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();

    try {

      // Set field flags
      ReturnFields returnFields = new SolrReturnFields(req);
      rsp.setReturnFields(returnFields);
      int flags = 0;
      if (returnFields.wantsScore()) {
        flags |= SolrIndexSearcher.GET_SCORES;
      }

      String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
      String q = params.get(CommonParams.Q);
      Query query = null;
      SortSpec sortSpec = null;
      List filters = null;

      try {
        if (q != null) {
          QParser parser = QParser.getParser(q, defType, req);
          query = parser.getQuery();
          sortSpec = parser.getSortSpec(true);
        }

        filters = QueryUtils.parseFilterQueries(req);
      } catch (SyntaxError e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
      }

      SolrIndexSearcher searcher = req.getSearcher();

      MoreLikeThisHelper mlt = new MoreLikeThisHelper(params, searcher);

      // Hold on to the interesting terms if relevant
      TermStyle termStyle = TermStyle.get(params.get(MoreLikeThisParams.INTERESTING_TERMS));

      DocListAndSet mltDocs = null;

      // Parse Required Params
      // This will either have a single Reader or valid query
      Reader reader = null;
      try {
        if (q == null || q.trim().length() < 1) {
          Iterable streams = req.getContentStreams();
          if (streams != null) {
            Iterator iter = streams.iterator();
            if (iter.hasNext()) {
              reader = iter.next().getReader();
            }
            if (iter.hasNext()) {
              throw new SolrException(
                  SolrException.ErrorCode.BAD_REQUEST, ERR_MSG_SINGLE_STREAM_ONLY);
            }
          }
        }

        int start = params.getInt(CommonParams.START, CommonParams.START_DEFAULT);
        int rows = params.getInt(CommonParams.ROWS, CommonParams.ROWS_DEFAULT);

        // Find documents MoreLikeThis - either with a reader or a query
        // --------------------------------------------------------------------------------
        if (reader != null) {
          mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters, flags);
        } else if (q != null) {
          // Matching options
          boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
          int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
          // Find the base match
          DocList match =
              searcher.getDocList(
                  query, null, null, matchOffset, 1, flags); // only get the first one...
          if (includeMatch) {
            rsp.add("match", match);
          }

          // This is an iterator, but we only handle the first match
          DocIterator iterator = match.iterator();
          if (iterator.hasNext()) {
            // do a MoreLikeThis query for each document in results
            int id = iterator.nextDoc();
            mltDocs = mlt.getMoreLikeThis(id, start, rows, filters, flags);
          }
        } else {
          throw new SolrException(
              SolrException.ErrorCode.BAD_REQUEST, ERR_MSG_QUERY_OR_TEXT_REQUIRED);
        }

      } finally {
        if (reader != null) {
          reader.close();
        }
      }

      if (mltDocs == null) {
        mltDocs = new DocListAndSet(); // avoid NPE
      }
      rsp.addResponse(mltDocs.docList);

      if (termStyle != TermStyle.NONE) {
        final List interesting =
            mlt.getInterestingTerms(mlt.getBoostedMLTQuery(), mlt.mlt.getMaxQueryTerms());
        if (termStyle == TermStyle.DETAILS) {
          NamedList it = new NamedList<>();
          for (InterestingTerm t : interesting) {
            it.add(t.term.toString(), t.boost);
          }
          rsp.add("interestingTerms", it);
        } else {
          List it = new ArrayList<>(interesting.size());
          for (InterestingTerm t : interesting) {
            it.add(t.term.text());
          }
          rsp.add("interestingTerms", it);
        }
      }

      // maybe facet the results
      if (params.getBool(FacetParams.FACET, false)) {
        if (mltDocs.docSet == null) {
          rsp.add("facet_counts", null);
        } else {
          final ResponseBuilder responseBuilder =
              new ResponseBuilder(req, rsp, Collections.emptyList());
          responseBuilder.setQuery(mlt.getRealMLTQuery());
          SimpleFacets f = new SimpleFacets(req, mltDocs.docSet, params, responseBuilder);
          FacetComponent.FacetContext.initContext(responseBuilder);
          rsp.add("facet_counts", FacetComponent.getFacetCounts(f));
        }
      }
      boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);

      boolean dbgQuery = false, dbgResults = false;
      if (dbg == false) { // if it's true, we are doing everything anyway.
        String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
        if (dbgParams != null) {
          for (String dbgParam : dbgParams) {
            if (dbgParam.equals(CommonParams.QUERY)) {
              dbgQuery = true;
            } else if (dbgParam.equals(CommonParams.RESULTS)) {
              dbgResults = true;
            }
          }
        }
      } else {
        dbgQuery = true;
        dbgResults = true;
      }
      // TODO resolve duplicated code with DebugComponent.  Perhaps it should be added to
      // doStandardDebug?
      if (dbg == true) {
        try {
          NamedList