org.apache.solr.handler.AnalysisRequestHandlerBase Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.solr.analysis.CharFilterFactory;
import org.apache.solr.analysis.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.analysis.TokenizerFactory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import java.io.IOException;
import java.io.StringReader;
import java.util.*;
/**
* A base class for all analysis request handlers.
*
* @version $Id: AnalysisRequestHandlerBase.java 827032 2009-10-20 11:01:47Z koji $
* @since solr 1.4
*/
public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
rsp.add("analysis", doAnalysis(req));
}
/**
* Performs the analysis based on the given solr request and returns the analysis result as a named list.
*
* @param req The solr request.
*
* @return The analysis result as a named list.
*
* @throws Exception When analysis fails.
*/
protected abstract NamedList doAnalysis(SolrQueryRequest req) throws Exception;
/**
* Analyzes the given value using the given Analyzer.
*
* @param value Value to analyze
* @param context The {@link AnalysisContext analysis context}.
*
* @return NamedList containing the tokens produced by analyzing the given value
*/
protected NamedList> analyzeValue(String value, AnalysisContext context) {
Analyzer analyzer = context.getAnalyzer();
if (!TokenizerChain.class.isInstance(analyzer)) {
TokenStream tokenStream = null;
try {
tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value));
tokenStream.reset();
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
NamedList> namedList = new NamedList>();
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
return namedList;
}
TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();
NamedList> namedList = new NamedList>();
if( cfiltfacs != null ){
String source = value;
for(CharFilterFactory cfiltfac : cfiltfacs ){
CharStream reader = CharReader.get(new StringReader(source));
reader = cfiltfac.create(reader);
source = writeCharStream(namedList, reader);
}
}
TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
List tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
List tokenList = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
listBasedTokenStream = new ListBasedTokenStream(tokenList);
}
return namedList;
}
/**
* Analyzes the given text using the given analyzer and returns the produced tokens.
*
* @param value The value to analyze.
* @param analyzer The analyzer to use.
*
* @return The produces token list.
*/
protected List analyzeValue(String value, Analyzer analyzer) {
TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value));
return analyzeTokenStream(tokenStream);
}
/**
* Analyzes the given TokenStream, collecting the Tokens it produces.
*
* @param tokenStream TokenStream to analyze
*
* @return List of tokens produced from the TokenStream
*/
private List analyzeTokenStream(TokenStream tokenStream) {
List tokens = new ArrayList();
Token reusableToken = new Token();
Token token = null;
try {
while ((token = tokenStream.next(reusableToken)) != null) {
tokens.add((Token) token.clone());
}
} catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
}
return tokens;
}
/**
* Converts the list of Tokens to a list of NamedLists representing the tokens.
*
* @param tokens Tokens to convert
* @param context The analysis context
*
* @return List of NamedLists containing the relevant information taken from the tokens
*/
private List convertTokensToNamedLists(List tokens, AnalysisContext context) {
List tokensNamedLists = new ArrayList();
Collections.sort(tokens, new Comparator() {
public int compare(Token o1, Token o2) {
return o1.endOffset() - o2.endOffset();
}
});
int position = 0;
FieldType fieldType = context.getFieldType();
for (Token token : tokens) {
NamedList