org.apache.solr.highlight.DefaultSolrHighlighter Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.plugin.PluginInfoInitialized;
/**
*
* @since solr 1.3
*/
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
{
private SolrCore solrCore;
public DefaultSolrHighlighter() {
}
public DefaultSolrHighlighter(SolrCore solrCore) {
this.solrCore = solrCore;
}
public void init(PluginInfo info) {
formatters.clear();
fragmenters.clear();
SolrFragmenter frag = solrCore.initPlugins(info.getChildren("fragmenter") , fragmenters,SolrFragmenter.class,null);
if (frag == null) frag = new GapFragmenter();
fragmenters.put("", frag);
fragmenters.put(null, frag);
// Load the formatters
SolrFormatter fmt = solrCore.initPlugins(info.getChildren("formatter"), formatters,SolrFormatter.class,null);
if (fmt == null) fmt = new HtmlFormatter();
formatters.put("", fmt);
formatters.put(null, fmt);
initialized = true;
}
//just for back-compat with the deprecated method
private boolean initialized = false;
@Deprecated
public void initalize( SolrConfig config) {
if (initialized) return;
SolrFragmenter frag = new GapFragmenter();
fragmenters.put("", frag);
fragmenters.put(null, frag);
SolrFormatter fmt = new HtmlFormatter();
formatters.put("", fmt);
formatters.put(null, fmt);
}
/**
* Return a phrase Highlighter appropriate for this field.
* @param query The current Query
* @param fieldName The name of the field
* @param request The current SolrQueryRequest
* @param tokenStream document text CachingTokenStream
* @throws IOException
*/
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream) throws IOException {
SolrParams params = request.getParams();
Highlighter highlighter = null;
highlighter = new Highlighter(getFormatter(fieldName, params), getSpanQueryScorer(query, fieldName, tokenStream, request));
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
return highlighter;
}
/**
* Return a Highlighter appropriate for this field.
* @param query The current Query
* @param fieldName The name of the field
* @param request The current SolrQueryRequest
*/
protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
SolrParams params = request.getParams();
Highlighter highlighter = new Highlighter(
getFormatter(fieldName, params),
getQueryScorer(query, fieldName, request));
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
return highlighter;
}
/**
* Return a SpanScorer suitable for this Query and field.
* @param query The current query
* @param tokenStream document text CachingTokenStream
* @param fieldName The name of the field
* @param request The SolrQueryRequest
* @throws IOException
*/
private QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) throws IOException {
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
Boolean highlightMultiTerm = request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true);
if(highlightMultiTerm == null) {
highlightMultiTerm = false;
}
QueryScorer scorer;
if (reqFieldMatch) {
scorer = new QueryScorer(query, fieldName);
}
else {
scorer = new QueryScorer(query, null);
}
scorer.setExpandMultiTermQuery(highlightMultiTerm);
return scorer;
}
/**
* Return a QueryScorer suitable for this Query and field.
* @param query The current query
* @param fieldName The name of the field
* @param request The SolrQueryRequest
*/
private Scorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
if (reqFieldMatch) {
return new QueryTermScorer(query, request.getSearcher().getReader(), fieldName);
}
else {
return new QueryTermScorer(query);
}
}
/**
* Return the max number of snippets for this field. If this has not
* been configured for this field, fall back to the configured default
* or the solr default.
* @param fieldName The name of the field
* @param params The params controlling Highlighting
*/
protected int getMaxSnippets(String fieldName, SolrParams params) {
return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
}
/**
* Return whether adjacent fragments should be merged.
* @param fieldName The name of the field
* @param params The params controlling Highlighting
*/
protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){
return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
}
/**
* Return a formatter appropriate for this field. If a formatter
* has not been configured for this field, fall back to the configured
* default or the solr default (SimpleHTMLFormatter).
*
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate Formatter.
*/
protected Formatter getFormatter(String fieldName, SolrParams params )
{
String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
SolrFormatter formatter = formatters.get( str );
if( formatter == null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str );
}
return formatter.getFormatter( fieldName, params );
}
/**
* Return a fragmenter appropriate for this field. If a fragmenter
* has not been configured for this field, fall back to the configured
* default or the solr default (GapFragmenter).
*
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate Fragmenter.
*/
protected Fragmenter getFragmenter(String fieldName, SolrParams params)
{
String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER );
SolrFragmenter frag = fragmenters.get( fmt );
if( frag == null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt );
}
return frag.getFragmenter( fieldName, params );
}
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled.
*
* @param docs query results
* @param query the query
* @param req the current request
* @param defaultFields default list of fields to summarize
*
* @return NamedList containing a NamedList for each document, which in
* turns contains sets (field, summary) pairs.
*/
@SuppressWarnings("unchecked")
public NamedList