org.apache.solr.handler.FieldAnalysisRequestHandler Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation

Apache Solr (module: core)

There is a newer version: 9.7.0

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler;

import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.AnalysisParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.security.AuthorizationContext;

/**
 * Provides the ability to specify multiple field types and field names in the same request.
 * Expected parameters:
 *
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * table of parametersName Type required Description Multi-valued
analysis.fieldname string no When present, the text will be analyzed based on the type of this field name. Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified fields
analysis.fieldtype string no When present, the text will be analyzed based on the specified type Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified field types
analysis.fieldvalue string no The text that will be analyzed. The analysis will mimic the index-time analysis. No
{@code analysis.query} OR {@code q} string no When present, the text that will be analyzed. The analysis will mimic the query-time analysis. Note that the
 * {@code analysis.query} parameter as precedes the {@code q} parameters. No
analysis.showmatch boolean no When set to {@code true} and when query analysis is performed, the produced tokens of the field value
 * analysis will be marked as "matched" for every token that is produces by the query analysis No
 *
 * Note that if neither analysis.fieldname and analysis.fieldtype is specified, then the default
 * search field's analyzer is used.
 *
 * Note that if one of analysis.value or analysis.query or q must be specified
 *
 * @since solr 1.4
 */
public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase {

  @Override
  @SuppressWarnings({"rawtypes"})
  protected NamedList doAnalysis(SolrQueryRequest req) throws Exception {
    FieldAnalysisRequest analysisRequest = resolveAnalysisRequest(req);
    IndexSchema indexSchema = req.getSchema();
    return handleAnalysisRequest(analysisRequest, indexSchema);
  }

  @Override
  public String getDescription() {
    return "Provide a breakdown of the analysis process of field/query text";
  }

  /**
   * Resolves the AnalysisRequest based on the parameters in the given SolrParams.
   *
   * @param req the request
   * @return AnalysisRequest containing all the information about what needs to be analyzed, and
   *     using what fields/types
   */
  FieldAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) throws SolrException {
    SolrParams solrParams = req.getParams();
    FieldAnalysisRequest analysisRequest = new FieldAnalysisRequest();

    boolean useDefaultSearchField = true;
    if (solrParams.get(AnalysisParams.FIELD_TYPE) != null) {
      analysisRequest.setFieldTypes(
          Arrays.asList(solrParams.get(AnalysisParams.FIELD_TYPE).split(",")));
      useDefaultSearchField = false;
    }
    if (solrParams.get(AnalysisParams.FIELD_NAME) != null) {
      analysisRequest.setFieldNames(
          Arrays.asList(solrParams.get(AnalysisParams.FIELD_NAME).split(",")));
      useDefaultSearchField = false;
    }
    if (useDefaultSearchField) {
      if (solrParams.get(CommonParams.DF) != null) {
        analysisRequest.addFieldName(solrParams.get(CommonParams.DF));
      } else {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            "Field analysis request must contain one of analysis.fieldtype, analysis.fieldname or df.");
      }
    }
    analysisRequest.setQuery(solrParams.get(AnalysisParams.QUERY, solrParams.get(CommonParams.Q)));

    String value = solrParams.get(AnalysisParams.FIELD_VALUE);
    if (analysisRequest.getQuery() == null && value == null) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "One of analysis.fieldvalue, q, or analysis.query parameters must be specified");
    }

    Iterable streams = req.getContentStreams();
    if (streams != null) {
      // NOTE: Only the first content stream is currently processed
      for (ContentStream stream : streams) {
        try {
          value = StrUtils.stringFromReader(stream.getReader());
        } catch (IOException e) {
          // do nothing, leave value set to the request parameter
        }
        break;
      }
    }

    analysisRequest.setFieldValue(value);
    analysisRequest.setShowMatch(solrParams.getBool(AnalysisParams.SHOW_MATCH, false));
    return analysisRequest;
  }

  /**
   * Handles the resolved analysis request and returns the analysis breakdown response as a named
   * list.
   *
   * @param request The request to handle.
   * @param schema The index schema.
   * @return The analysis breakdown as a named list.
   */
  @SuppressWarnings({"rawtypes"})
  protected NamedList handleAnalysisRequest(
      FieldAnalysisRequest request, IndexSchema schema) {
    NamedList analysisResults = new SimpleOrderedMap<>();

    NamedList fieldTypeAnalysisResults = new SimpleOrderedMap<>();
    if (request.getFieldTypes() != null) {
      for (String fieldTypeName : request.getFieldTypes()) {
        FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
        fieldTypeAnalysisResults.add(fieldTypeName, analyzeValues(request, fieldType, null));
      }
    }

    NamedList fieldNameAnalysisResults = new SimpleOrderedMap<>();
    if (request.getFieldNames() != null) {
      for (String fieldName : request.getFieldNames()) {
        FieldType fieldType = schema.getFieldType(fieldName);
        fieldNameAnalysisResults.add(fieldName, analyzeValues(request, fieldType, fieldName));
      }
    }

    analysisResults.add("field_types", fieldTypeAnalysisResults);
    analysisResults.add("field_names", fieldNameAnalysisResults);

    return analysisResults;
  }

  /**
   * Analyzes the index value (if it exists) and the query value (if it exists) in the given
   * AnalysisRequest, using the Analyzers of the given field type.
   *
   * @param analysisRequest AnalysisRequest from where the index and query values will be taken
   * @param fieldType Type of field whose analyzers will be used
   * @param fieldName Name of the field to be analyzed. Can be {@code null}
   * @return NamedList containing the tokens produced by the analyzers of the given field, separated
   *     into an index and a query group
   */
  // package access for testing
  @SuppressWarnings({"rawtypes"})
  NamedList analyzeValues(
      FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {

    final String queryValue = analysisRequest.getQuery();
    final Set termsToMatch =
        (queryValue != null && analysisRequest.isShowMatch())
            ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
            : EMPTY_BYTES_SET;

    NamedList analyzeResults = new SimpleOrderedMap<>();
    if (analysisRequest.getFieldValue() != null) {
      AnalysisContext context =
          new AnalysisContext(fieldName, fieldType, fieldType.getIndexAnalyzer(), termsToMatch);
      NamedList analyzedTokens = analyzeValue(analysisRequest.getFieldValue(), context);
      analyzeResults.add("index", analyzedTokens);
    }
    if (analysisRequest.getQuery() != null) {
      AnalysisContext context =
          new AnalysisContext(fieldName, fieldType, fieldType.getQueryAnalyzer());
      NamedList analyzedTokens = analyzeValue(analysisRequest.getQuery(), context);
      analyzeResults.add("query", analyzedTokens);
    }

    return analyzeResults;
  }

  @Override
  public Name getPermissionName(AuthorizationContext request) {
    return Name.READ_PERM;
  }
}

Name	Type	required	Description	Multi-valued
analysis.fieldname	string	no	When present, the text will be analyzed based on the type of this field name.	Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified fields
analysis.fieldtype	string	no	When present, the text will be analyzed based on the specified type	Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified field types
analysis.fieldvalue	string	no	The text that will be analyzed. The analysis will mimic the index-time analysis.	No
{@code analysis.query} OR {@code q}	string	no	When present, the text that will be analyzed. The analysis will mimic the query-time analysis. Note that the * {@code analysis.query} parameter as precedes the {@code q} parameters.	No
analysis.showmatch	boolean	no	When set to {@code true} and when query analysis is performed, the produced tokens of the field value * analysis will be marked as "matched" for every token that is produces by the query analysis	No