org.apache.solr.handler.DocumentAnalysisRequestHandler Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr Server
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.handler;

import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.AnalysisParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.util.*;

/**
 * An analysis handler that provides a breakdown of the analysis process of provided docuemnts. This handler expects a
 * (single) content stream of the following format:
 * 
 * 

 *  <docs>
 *      <doc>
 *          <field name="id">1</field>
 *          <field name="name">The Name</field>
 *          <field name="text">The Text Value</field>
 *      <doc>
 *      <doc>...</doc>
 *      <doc>...</doc>
 *      ...
 *  </docs>
 * 
 * 
 * Note: Each document must contain a field which serves as the unique key. This key is used in the returned
 * response to assoicate an analysis breakdown to the analyzed document.
 * 

 * 

 * 

 * Like the {@link org.apache.solr.handler.FieldAnalysisRequestHandler}, this handler also supports query analysis by
 * sending either an "analysis.query" or "q" request paraemter that holds the query text to be analyzed. It also
 * supports the "analysis.showmatch" parameter which when set to {@code true}, all field tokens that match the query
 * tokens will be marked as a "match".
 *
 * @version $Id: DocumentAnalysisRequestHandler.java 824333 2009-10-12 13:40:27Z ehatcher $
 * @since solr 1.4
 */
public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase {

  public static final Logger log = LoggerFactory.getLogger(DocumentAnalysisRequestHandler.class);

  private static final float DEFAULT_BOOST = 1.0f;

  private XMLInputFactory inputFactory;

  @Override
  public void init(NamedList args) {
    super.init(args);

    inputFactory = XMLInputFactory.newInstance();
    try {
      // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
      // XMLInputFactory, as that implementation tries to cache and reuse the
      // XMLStreamReader.  Setting the parser-specific "reuse-instance" property to false
      // prevents this.
      // All other known open-source stax parsers (and the bea ref impl)
      // have thread-safe factories.
      inputFactory.setProperty("reuse-instance", Boolean.FALSE);
    } catch (IllegalArgumentException ex) {
      // Other implementations will likely throw this exception since "reuse-instance"
      // isimplementation specific.
      log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory);
    }
  }

  /**
   * {@inheritDoc}
   */
  protected NamedList doAnalysis(SolrQueryRequest req) throws Exception {
    DocumentAnalysisRequest analysisRequest = resolveAnalysisRequest(req);
    return handleAnalysisRequest(analysisRequest, req.getSchema());
  }

  @Override
  public String getDescription() {
    return "Provides a breakdown of the analysis process of provided documents";
  }

  @Override
  public String getVersion() {
    return "$Revision: 824333 $";
  }

  @Override
  public String getSourceId() {
    return "$Id: DocumentAnalysisRequestHandler.java 824333 2009-10-12 13:40:27Z ehatcher $";
  }

  @Override
  public String getSource() {
    return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.4/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java $";
  }


  //================================================ Helper Methods ==================================================

  /**
   * Resolves the {@link DocumentAnalysisRequest} from the given solr request.
   *
   * @param req The solr request.
   *
   * @return The resolved document analysis request.
   *
   * @throws IOException        Thrown when reading/parsing the content stream of the request fails.
   * @throws XMLStreamException Thrown when reading/parsing the content stream of the request fails.
   */
  DocumentAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) throws IOException, XMLStreamException {

    DocumentAnalysisRequest request = new DocumentAnalysisRequest();

    SolrParams params = req.getParams();

    String query = params.get(AnalysisParams.QUERY, params.get(CommonParams.Q, null));
    request.setQuery(query);

    boolean showMatch = params.getBool(AnalysisParams.SHOW_MATCH, false);
    request.setShowMatch(showMatch);

    ContentStream stream = extractSingleContentStream(req);
    Reader reader = stream.getReader();
    XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);

    try {

      while (true) {
        int event = parser.next();
        switch (event) {
          case XMLStreamConstants.END_DOCUMENT: {
            parser.close();
            return request;
          }
          case XMLStreamConstants.START_ELEMENT: {
            String currTag = parser.getLocalName();
            if ("doc".equals(currTag)) {
              log.trace("Reading doc...");
              SolrInputDocument document = readDocument(parser, req.getSchema());
              request.addDocument(document);
            }
            break;
          }
        }
      }

    } finally {
      parser.close();
      IOUtils.closeQuietly(reader);
    }
  }

  /**
   * Handles the resolved {@link DocumentAnalysisRequest} and returns the analysis response as a named list.
   *
   * @param request The {@link DocumentAnalysisRequest} to be handled.
   * @param schema  The index schema.
   *
   * @return The analysis response as a named list.
   */
  NamedList