All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.client.solrj.response.AnalysisResponseBase Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.client.solrj.response;

import org.apache.solr.common.util.NamedList;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * A base class for all analysis responses.
 *
 *
 * @since solr 1.4
 */
public class AnalysisResponseBase extends SolrResponseBase {

  /**
   * Parses the given named list and builds a list of analysis phases form it. Expects a named list of the form:
   * 
*

   *  <lst name="index">
   *      <arr name="Tokenizer">
   *          <str name="text">the_text</str>
   *          <str name="rawText">the_raw_text</str> (optional)
   *          <str name="type">the_type</str>
   *          <int name="start">1</str>
   *          <int name="end">3</str>
   *          <int name="position">1</str>
   *          <bool name="match">true | false</bool> (optional)
   *      </arr>
   *      <arr name="Filter1">
   *          <str name="text">the_text</str>
   *          <str name="rawText">the_raw_text</str> (optional)
   *          <str name="type">the_type</str>
   *          <int name="start">1</str>
   *          <int name="end">3</str>
   *          <int name="position">1</str>
   *          <bool name="match">true | false</bool> (optional)
   *      </arr>
   *      ...
   *  </lst>
   * 
* * @param phaseNL The names list to parse. * * @return The built analysis phases list. */ protected List buildPhases(NamedList>> phaseNL) { List phases = new ArrayList<>(phaseNL.size()); for (Map.Entry>> phaseEntry : phaseNL) { AnalysisPhase phase = new AnalysisPhase(phaseEntry.getKey()); List> tokens = phaseEntry.getValue(); for (NamedList token : tokens) { TokenInfo tokenInfo = buildTokenInfo(token); phase.addTokenInfo(tokenInfo); } phases.add(phase); } return phases; } /** * Parses the given named list and builds a token infoform it. Expects a named list of the form: *
*

   *  <arr name="Tokenizer">
   *      <str name="text">the_text</str>
   *      <str name="rawText">the_raw_text</str> (optional)
   *      <str name="type">the_type</str>
   *      <int name="start">1</str>
   *      <int name="end">3</str>
   *      <int name="position">1</str>
   *      <bool name="match">true | false</bool> (optional)
   *  </arr>
   * 
* * @param tokenNL The named list to parse. * * @return The built token info. */ protected TokenInfo buildTokenInfo(NamedList tokenNL) { String text = (String) tokenNL.get("text"); String rawText = (String) tokenNL.get("rawText"); String type = (String) tokenNL.get("type"); int start = (Integer) tokenNL.get("start"); int end = (Integer) tokenNL.get("end"); int position = (Integer) tokenNL.get("position"); Boolean match = (Boolean) tokenNL.get("match"); return new TokenInfo(text, rawText, type, start, end, position, (match == null ? false : match)); } //================================================= Inner Classes ================================================== /** * A phase in the analysis process. The phase holds the tokens produced in this phase and the name of the class that * produced them. */ public static class AnalysisPhase { private final String className; private List tokens = new ArrayList<>(); AnalysisPhase(String className) { this.className = className; } /** * The name of the class (analyzer, tokenzier, or filter) that produced the token stream for this phase. * * @return The name of the class that produced the token stream for this phase. */ public String getClassName() { return className; } private void addTokenInfo(TokenInfo tokenInfo) { tokens.add(tokenInfo); } /** * Returns a list of tokens which represent the token stream produced in this phase. * * @return A list of tokens which represent the token stream produced in this phase. */ public List getTokens() { return tokens; } } /** * Holds all information of a token as part of an analysis phase. */ public static class TokenInfo { private final String text; private final String rawText; private final String type; private final int start; private final int end; private final int position; private final boolean match; /** * Constructs a new TokenInfo. * * @param text The text of the token * @param rawText The raw text of the token. If the token is stored in the index in a special format (e.g. * dates or padded numbers) this argument should hold this value. If the token is stored as is, * then this value should be {@code null}. * @param type The type fo the token (typically either {@code word} or {@code } though it depends * on the tokenizer/filter used). * @param start The start position of the token in the original text where it was extracted from. * @param end The end position of the token in the original text where it was extracted from. * @param position The position of the token within the token stream. * @param match Indicates whether this token matches one of the the query tokens. */ TokenInfo(String text, String rawText, String type, int start, int end, int position, boolean match) { this.text = text; this.rawText = rawText; this.type = type; this.start = start; this.end = end; this.position = position; this.match = match; } /** * Returns the text of the token. * * @return The text of the token. */ public String getText() { return text; } /** * Returns the raw text of the token. If the token is index in a special format (e.g. date or paddded numbers) * it will be returned as the raw text. Returns {@code null} if the token is indexed as is. * * @return Returns the raw text of the token. */ public String getRawText() { return rawText; } /** * Returns the type of the token. Typically this will be {@code word} or {@code }, but it really * depends on the tokenizer and filters that are used. * * @return The type of the token. */ public String getType() { return type; } /** * Returns the start position of this token within the text it was originally extracted from. * * @return The start position of this token within the text it was originally extracted from. */ public int getStart() { return start; } /** * Returns the end position of this token within the text it was originally extracted from. * * @return The end position of this token within the text it was originally extracted from. */ public int getEnd() { return end; } /** * Returns the position of this token within the produced token stream. * * @return The position of this token within the produced token stream. */ public int getPosition() { return position; } /** * Returns whether this token matches one of the query tokens (if query analysis is performed). * * @return Whether this token matches one of the query tokens (if query analysis is performed). */ public boolean isMatch() { return match; } } }