Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.OffsetLimitTokenFilter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.QueryTermScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.search.highlight.WeightedSpanTerm;
import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* @since solr 1.3
*/
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
{
/**
* This constant was formerly part of HighlightParams. After deprecation it was removed so clients
* would no longer use it, but we still support it server side.
*/
private static final String USE_FVH = HighlightParams.HIGHLIGHT + ".useFastVectorHighlighter";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected final SolrCore solrCore;
//Will be invoked via reflection
public DefaultSolrHighlighter(SolrCore solrCore) {
this.solrCore = solrCore;
}
// Thread safe registry
protected final Map formatters =
new HashMap<>();
// Thread safe registry
protected final Map encoders =
new HashMap<>();
// Thread safe registry
protected final Map fragmenters =
new HashMap<>() ;
// Thread safe registry
protected final Map fragListBuilders =
new HashMap<>() ;
// Thread safe registry
protected final Map fragmentsBuilders =
new HashMap<>() ;
// Thread safe registry
protected final Map boundaryScanners =
new HashMap<>() ;
@Override
public void init(PluginInfo info) {
formatters.clear();
encoders.clear();
fragmenters.clear();
fragListBuilders.clear();
fragmentsBuilders.clear();
boundaryScanners.clear();
// Load the fragmenters
SolrFragmenter frag = solrCore.initPlugins(info.getChildren("fragmenter") , fragmenters,SolrFragmenter.class,null);
if (frag == null) {
frag = new GapFragmenter();
solrCore.initDefaultPlugin(frag, SolrFragmenter.class);
}
fragmenters.put("", frag);
fragmenters.put(null, frag);
// Load the formatters
SolrFormatter fmt = solrCore.initPlugins(info.getChildren("formatter"), formatters,SolrFormatter.class,null);
if (fmt == null) {
fmt = new HtmlFormatter();
solrCore.initDefaultPlugin(fmt, SolrFormatter.class);
}
formatters.put("", fmt);
formatters.put(null, fmt);
// Load the encoders
SolrEncoder enc = solrCore.initPlugins(info.getChildren("encoder"), encoders,SolrEncoder.class,null);
if (enc == null) {
enc = new DefaultEncoder();
solrCore.initDefaultPlugin(enc, SolrEncoder.class);
}
encoders.put("", enc);
encoders.put(null, enc);
// Load the FragListBuilders
SolrFragListBuilder fragListBuilder = solrCore.initPlugins(info.getChildren("fragListBuilder"),
fragListBuilders, SolrFragListBuilder.class, null );
if( fragListBuilder == null ) {
fragListBuilder = new SimpleFragListBuilder();
solrCore.initDefaultPlugin(fragListBuilder, SolrFragListBuilder.class);
}
fragListBuilders.put( "", fragListBuilder );
fragListBuilders.put( null, fragListBuilder );
// Load the FragmentsBuilders
SolrFragmentsBuilder fragsBuilder = solrCore.initPlugins(info.getChildren("fragmentsBuilder"),
fragmentsBuilders, SolrFragmentsBuilder.class, null);
if( fragsBuilder == null ) {
fragsBuilder = new ScoreOrderFragmentsBuilder();
solrCore.initDefaultPlugin(fragsBuilder, SolrFragmentsBuilder.class);
}
fragmentsBuilders.put( "", fragsBuilder );
fragmentsBuilders.put( null, fragsBuilder );
// Load the BoundaryScanners
SolrBoundaryScanner boundaryScanner = solrCore.initPlugins(info.getChildren("boundaryScanner"),
boundaryScanners, SolrBoundaryScanner.class, null);
if(boundaryScanner == null) {
boundaryScanner = new SimpleBoundaryScanner();
solrCore.initDefaultPlugin(boundaryScanner, SolrBoundaryScanner.class);
}
boundaryScanners.put("", boundaryScanner);
boundaryScanners.put(null, boundaryScanner);
}
/**
* Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
* @param query The current Query
* @param fieldName The name of the field
* @param request The current SolrQueryRequest
* @param tokenStream document text tokenStream that implements reset() efficiently (e.g. CachingTokenFilter).
* If it's used, call reset() first.
* @throws IOException If there is a low-level I/O error.
*/
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request, TokenStream tokenStream) throws IOException {
SolrParams params = request.getParams();
Highlighter highlighter = new Highlighter(
getFormatter(fieldName, params),
getEncoder(fieldName, params),
getSpanQueryScorer(query, fieldName, tokenStream, request));
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
return highlighter;
}
/**
* Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
* @param query The current Query
* @param fieldName The name of the field
* @param request The current SolrQueryRequest
*/
protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
SolrParams params = request.getParams();
Highlighter highlighter = new Highlighter(
getFormatter(fieldName, params),
getEncoder(fieldName, params),
getQueryScorer(query, fieldName, request));
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
return highlighter;
}
/**
* Return a {@link org.apache.lucene.search.highlight.QueryScorer} suitable for this Query and field.
* @param query The current query
* @param tokenStream document text tokenStream that implements reset() efficiently (e.g. CachingTokenFilter).
* If it's used, call reset() first.
* @param fieldName The name of the field
* @param request The SolrQueryRequest
*/
protected QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) {
QueryScorer scorer = new QueryScorer(query,
request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false) ? fieldName : null) {
@Override
protected WeightedSpanTermExtractor newTermExtractor(String defaultField) {
return new CustomSpanTermExtractor(defaultField);
}
};
scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true));
boolean defaultPayloads = true;//overwritten below
try {
// It'd be nice to know if payloads are on the tokenStream but the presence of the attribute isn't a good
// indicator.
final Terms terms = request.getSearcher().getSlowAtomicReader().terms(fieldName);
if (terms != null) {
defaultPayloads = terms.hasPayloads();
}
} catch (IOException e) {
log.error("Couldn't check for existence of payloads", e);
}
scorer.setUsePayloads(request.getParams().getFieldBool(fieldName, HighlightParams.PAYLOADS, defaultPayloads));
return scorer;
}
private static class CustomSpanTermExtractor extends WeightedSpanTermExtractor {
public CustomSpanTermExtractor(String defaultField) {
super(defaultField);
}
@Override
protected void extract(Query query, float boost, Map terms) throws IOException {
// these queries are not supported in lucene highlighting out of the box since 8.0
if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
} else if (query instanceof ToChildBlockJoinQuery) {
extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
} else {
super.extract(query, boost, terms);
}
}
}
/**
* Return a {@link org.apache.lucene.search.highlight.Scorer} suitable for this Query and field.
* @param query The current query
* @param fieldName The name of the field
* @param request The SolrQueryRequest
*/
protected Scorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
if (reqFieldMatch) {
return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
} else {
return new QueryTermScorer(query);
}
}
/**
* Return the max number of snippets for this field. If this has not
* been configured for this field, fall back to the configured default
* or the solr default.
* @param fieldName The name of the field
* @param params The params controlling Highlighting
*/
protected int getMaxSnippets(String fieldName, SolrParams params) {
return params.getFieldInt(fieldName, HighlightParams.SNIPPETS, 1);
}
/**
* Return whether adjacent fragments should be merged.
* @param fieldName The name of the field
* @param params The params controlling Highlighting
*/
protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){
return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
}
/**
* Return a {@link org.apache.lucene.search.highlight.Formatter} appropriate for this field. If a formatter
* has not been configured for this field, fall back to the configured
* default or the solr default ({@link org.apache.lucene.search.highlight.SimpleHTMLFormatter}).
*
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate {@link org.apache.lucene.search.highlight.Formatter}.
*/
protected Formatter getFormatter(String fieldName, SolrParams params )
{
String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
SolrFormatter formatter = formatters.get(str);
if( formatter == null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str );
}
return formatter.getFormatter(fieldName, params);
}
/**
* Return an {@link org.apache.lucene.search.highlight.Encoder} appropriate for this field. If an encoder
* has not been configured for this field, fall back to the configured
* default or the solr default ({@link org.apache.lucene.search.highlight.DefaultEncoder}).
*
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate {@link org.apache.lucene.search.highlight.Encoder}.
*/
protected Encoder getEncoder(String fieldName, SolrParams params){
String str = params.getFieldParam( fieldName, HighlightParams.ENCODER );
SolrEncoder encoder = encoders.get( str );
if( encoder == null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown encoder: "+str );
}
return encoder.getEncoder(fieldName, params);
}
/**
* Return a {@link org.apache.lucene.search.highlight.Fragmenter} appropriate for this field. If a fragmenter
* has not been configured for this field, fall back to the configured
* default or the solr default ({@link GapFragmenter}).
*
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate {@link org.apache.lucene.search.highlight.Fragmenter}.
*/
protected Fragmenter getFragmenter(String fieldName, SolrParams params)
{
String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER );
SolrFragmenter frag = fragmenters.get(fmt);
if( frag == null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt );
}
return frag.getFragmenter(fieldName, params);
}
protected FragListBuilder getFragListBuilder( String fieldName, SolrParams params ){
String flb = params.getFieldParam( fieldName, HighlightParams.FRAG_LIST_BUILDER );
SolrFragListBuilder solrFlb = fragListBuilders.get(flb);
if( solrFlb == null ){
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragListBuilder: " + flb );
}
return solrFlb.getFragListBuilder(params);
}
protected FragmentsBuilder getFragmentsBuilder( String fieldName, SolrParams params ){
BoundaryScanner bs = getBoundaryScanner(fieldName, params);
return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder(params, bs);
}
protected SolrFragmentsBuilder getSolrFragmentsBuilder( String fieldName, SolrParams params ){
String fb = params.getFieldParam( fieldName, HighlightParams.FRAGMENTS_BUILDER );
SolrFragmentsBuilder solrFb = fragmentsBuilders.get(fb);
if( solrFb == null ){
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb );
}
return solrFb;
}
protected BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params){
String bs = params.getFieldParam(fieldName, HighlightParams.BOUNDARY_SCANNER);
SolrBoundaryScanner solrBs = boundaryScanners.get(bs);
if(solrBs == null){
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown boundaryScanner: " + bs);
}
return solrBs.getBoundaryScanner(fieldName, params);
}
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled.
*
* @param docs query results
* @param query the query
* @param req the current request
* @param defaultFields default list of fields to summarize
*
* @return NamedList containing a NamedList for each document, which in
* turns contains sets (field, summary) pairs.
*/
@Override
@SuppressWarnings("unchecked")
public NamedList