org.apache.solr.highlight.UnifiedSolrHighlighter Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.highlight;

import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.function.Supplier;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
import org.apache.lucene.search.uhighlight.LengthGoalBreakIterator;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
import org.apache.lucene.search.uhighlight.WholeBreakIterator;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.util.RTimerTree;
import org.apache.solr.util.plugin.PluginInfoInitialized;

/**
 * Highlighter impl that uses {@link UnifiedHighlighter}
 *
 * Example configuration with default values:
 *
 * 
 * <requestHandler name="/select" class="solr.SearchHandler">
 * <lst name="defaults">
 * <str name="hl.method">unified</str>
 * <int name="hl.snippets">1</int>
 * <str name="hl.tag.pre">&lt;em&gt;</str>
 * <str name="hl.tag.post">&lt;/em&gt;</str>
 * <str name="hl.simple.pre">&lt;em&gt;</str>
 * <str name="hl.simple.post">&lt;/em&gt;</str>
 * <str name="hl.tag.ellipsis">(internal/unspecified)</str>
 * <bool name="hl.defaultSummary">false</bool>
 * <str name="hl.encoder">simple</str>
 * <float name="hl.score.k1">1.2</float>
 * <float name="hl.score.b">0.75</float>
 * <float name="hl.score.pivot">87</float>
 * <str name="hl.bs.language"></str>
 * <str name="hl.bs.country"></str>
 * <str name="hl.bs.variant"></str>
 * <str name="hl.bs.type">SENTENCE</str>
 * <int name="hl.maxAnalyzedChars">51200</int>
 * <bool name="hl.highlightMultiTerm">true</bool>
 * <bool name="hl.usePhraseHighlighter">true</bool>
 * <int name="hl.cacheFieldValCharsThreshold">524288</int>
 * <str name="hl.offsetSource"></str>
 * <bool name="hl.weightMatches">true</bool>
 * </lst>
 * </requestHandler>
 * 
 *
 * Notes:
 *
 * 

 *   hl.q (string) can specify the query
 *   
hl.fl (string) specifies the field list.
 *   
hl.snippets (int) specifies how many snippets to return.
 *   
hl.tag.pre (string) specifies text which appears before a highlighted term.
 *   
hl.tag.post (string) specifies text which appears after a highlighted term.
 *   
hl.simple.pre (string) specifies text which appears before a highlighted term. (prefer
 *       hl.tag.pre)
 *   
hl.simple.post (string) specifies text which appears before a highlighted term. (prefer
 *       hl.tag.post)
 *   
hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. The default is
 *       to retain each value in a list without joining them.
 *   
hl.defaultSummary (bool) specifies if a field should have a default summary of the leading
 *       text.
 *   
hl.encoder (string) can be 'html' (html escapes content) or 'simple' (no escaping).
 *   
hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
 *   
hl.score.b (float) specifies bm25 scoring parameter 'b'
 *   
hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
 *   
hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD,
 *       CHAR, WHOLE]
 *   
hl.bs.language (string) specifies language code for BreakIterator. default is empty string
 *       (root locale)
 *   
hl.bs.country (string) specifies country code for BreakIterator. default is empty string
 *       (root locale)
 *   
hl.bs.variant (string) specifies country code for BreakIterator. default is empty string
 *       (root locale)
 *   
hl.maxAnalyzedChars (int) specifies how many characters at most will be processed in a
 *       document for any one field.
 *   
hl.highlightMultiTerm (bool) enables highlighting for range/wildcard/fuzzy/prefix queries
 *       at some cost. default is true
 *   
hl.usePhraseHighlighter (bool) enables phrase highlighting. default is true
 *   
hl.cacheFieldValCharsThreshold (int) controls how many characters from a field are cached.
 *       default is 524288 (1MB in 2 byte chars)
 *   
hl.offsetSource (string) specifies which offset source to use, prefers postings, but will
 *       use what's available if not specified
 *   
hl.weightMatches (bool) enables Lucene Weight Matches mode
 * 
 *
 * @lucene.experimental
 */
public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {

  protected static final String SNIPPET_SEPARATOR = "\u0000";

  @Override
  public void init(PluginInfo info) {}

  @Override
  public NamedList