All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yelp.nrtsearch.server.highlights.HighlightUtils Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta.1
Show newest version
/*
 * Copyright 2022 Yelp Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.yelp.nrtsearch.server.highlights;

import com.yelp.nrtsearch.server.grpc.Highlight;
import com.yelp.nrtsearch.server.grpc.Highlight.Settings;
import com.yelp.nrtsearch.server.grpc.Highlight.Type;
import com.yelp.nrtsearch.server.index.IndexState;
import com.yelp.nrtsearch.server.query.QueryNodeMapper;
import com.yelp.nrtsearch.server.utils.StructValueTransformer;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner;

/** Helper class to create {@link HighlightSettings} from a search request. */
public class HighlightUtils {

  private static final String[] DEFAULT_PRE_TAGS = new String[] {""};
  private static final String[] DEFAULT_POST_TAGS = new String[] {""};
  private static final int DEFAULT_FRAGMENT_SIZE = 100; // In number of characters
  private static final int DEFAULT_MAX_NUM_FRAGMENTS = 5;

  private static String DEFAULT_HIGHLIGHTER_NAME = NRTFastVectorHighlighter.HIGHLIGHTER_NAME;
  private static final String DEFAULT_FRAGMENTER = "span";
  private static final boolean DEFAULT_SCORE_ORDERED = true;
  private static final boolean DEFAULT_FIELD_MATCH = false;
  private static final boolean DEFAULT_DISCRETE_MULTIVALUE = false;
  private static final char[] DEFAULT_BOUNDARY_CHARS = SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS;
  private static final int DEFAULT_BOUNDARY_MAX_SCAN = SimpleBoundaryScanner.DEFAULT_MAX_SCAN;
  private static final Locale DEFAULT_BOUNDARY_SCANNER_LOCALE = Locale.ROOT;
  private static final QueryNodeMapper QUERY_NODE_MAPPER = QueryNodeMapper.getInstance();

  /**
   * Create the {@link HighlightSettings} for every field that is required to be highlighted.
   * Converts query-level and field-level settings into a single setting for every field.
   *
   * @param highlight Highlighting-related information in search request
   * @param searchQuery Compiled Lucene-level query from the search request
   * @param indexState {@link IndexState} for the index
   * @param highlighterService service to provide highlighters for each field
   * @return {@link Map} of field name to its {@link HighlightSettings}
   */
  static Map createPerFieldSettings(
      Highlight highlight,
      Query searchQuery,
      IndexState indexState,
      HighlighterService highlighterService) {
    HighlightSettings globalSettings =
        createGlobalFieldSettings(indexState, searchQuery, highlight, highlighterService);
    Map fieldSettings = new HashMap<>();
    Map fieldSettingsFromRequest = highlight.getFieldSettingsMap();
    for (String field : highlight.getFieldsList()) {
      if (!fieldSettingsFromRequest.containsKey(field)) {
        fieldSettings.put(field, globalSettings);
      } else {
        Settings settings = fieldSettingsFromRequest.get(field);
        HighlightSettings.Builder builder =
            new HighlightSettings.Builder()
                .withHighlighter(
                    // DEFAULT in field override has a different meaning: no override
                    settings.getHighlighterType() == Type.DEFAULT
                        ? globalSettings.getHighlighter()
                        : highlighterService.getHighlighter(resolveHighlighterName(settings)))
                .withPreTags(
                    !settings.getPreTagsList().isEmpty()
                        ? settings.getPreTagsList().toArray(new String[0])
                        : globalSettings.getPreTags())
                .withPostTags(
                    !settings.getPostTagsList().isEmpty()
                        ? settings.getPostTagsList().toArray(new String[0])
                        : globalSettings.getPostTags())
                .withFragmentSize(
                    settings.hasFragmentSize()
                        ? settings.getFragmentSize().getValue()
                        : globalSettings.getFragmentSize())
                .withHighlightQuery(
                    settings.hasHighlightQuery()
                        ? QUERY_NODE_MAPPER.getQuery(settings.getHighlightQuery(), indexState)
                        : globalSettings.getHighlightQuery())
                .withScoreOrdered(
                    settings.hasScoreOrdered()
                        ? settings.getScoreOrdered().getValue()
                        : globalSettings.isScoreOrdered())
                .withFragmenter(
                    settings.hasFragmenter()
                        ? settings.getFragmenter().getValue()
                        : globalSettings.getFragmenter())
                .withFieldMatch(
                    settings.hasFieldMatch()
                        ? settings.getFieldMatch().getValue()
                        : globalSettings.getFieldMatch())
                .withDiscreteMultivalue(
                    settings.hasDiscreteMultivalue()
                        ? settings.getDiscreteMultivalue().getValue()
                        : globalSettings.getDiscreteMultivalue())
                .withBoundaryScanner(
                    settings.hasBoundaryScanner()
                        ? settings.getBoundaryScanner().getValue()
                        : globalSettings.getBoundaryScanner())
                .withBoundaryChars(
                    settings.hasBoundaryChars() && !settings.getBoundaryChars().getValue().isEmpty()
                        ? settings.getBoundaryChars().getValue().toCharArray()
                        : globalSettings.getBoundaryChars())
                .withBoundaryMaxScan(
                    settings.hasBoundaryMaxScan()
                        ? settings.getBoundaryMaxScan().getValue()
                        : globalSettings.getBoundaryMaxScan())
                .withBoundaryScannerLocale(
                    settings.hasBoundaryScannerLocale()
                        ? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue())
                        : globalSettings.getBoundaryScannerLocale())
                .withCustomHighlighterParams(
                    settings.hasCustomHighlighterParams()
                        ? StructValueTransformer.transformStruct(
                            settings.getCustomHighlighterParams())
                        : globalSettings.getCustomHighlighterParams());

        if (!settings.hasMaxNumberOfFragments()) {
          builder.withMaxNumFragments(globalSettings.getMaxNumFragments());
        } else {
          if (settings.getMaxNumberOfFragments().getValue() == 0) {
            builder.withMaxNumFragments(0).withFragmentSize(Integer.MAX_VALUE);
          } else {
            builder.withMaxNumFragments(settings.getMaxNumberOfFragments().getValue());
          }
        }
        HighlightSettings highlightSettings = builder.build();
        fieldSettings.put(field, highlightSettings);
      }
    }
    return Collections.unmodifiableMap(fieldSettings);
  }

  private static HighlightSettings createGlobalFieldSettings(
      IndexState indexState,
      Query searchQuery,
      Highlight highlight,
      HighlighterService highlighterService) {
    Settings settings = highlight.getSettings();

    HighlightSettings.Builder builder = new HighlightSettings.Builder();

    builder
        .withHighlighter(highlighterService.getHighlighter(resolveHighlighterName(settings)))
        .withPreTags(
            settings.getPreTagsList().isEmpty()
                ? DEFAULT_PRE_TAGS
                : settings.getPreTagsList().toArray(new String[0]))
        .withPostTags(
            settings.getPostTagsList().isEmpty()
                ? DEFAULT_POST_TAGS
                : settings.getPostTagsList().toArray(new String[0]))
        .withScoreOrdered(
            settings.hasScoreOrdered()
                ? settings.getScoreOrdered().getValue()
                : DEFAULT_SCORE_ORDERED)
        .withFragmenter(
            settings.hasFragmenter() ? settings.getFragmenter().getValue() : DEFAULT_FRAGMENTER)
        .withFieldMatch(
            settings.hasFieldMatch() ? settings.getFieldMatch().getValue() : DEFAULT_FIELD_MATCH)
        .withDiscreteMultivalue(
            settings.hasDiscreteMultivalue()
                ? settings.getDiscreteMultivalue().getValue()
                : DEFAULT_DISCRETE_MULTIVALUE)
        .withMaxNumFragments(
            settings.hasMaxNumberOfFragments()
                ? settings.getMaxNumberOfFragments().getValue()
                : DEFAULT_MAX_NUM_FRAGMENTS)
        .withFragmentSize(
            settings.hasFragmentSize()
                ? settings.getFragmentSize().getValue()
                : DEFAULT_FRAGMENT_SIZE)
        .withBoundaryScanner(
            settings.hasBoundaryScanner() ? settings.getBoundaryScanner().getValue() : null)
        .withBoundaryChars(
            settings.hasBoundaryChars() && !settings.getBoundaryChars().getValue().isEmpty()
                ? settings.getBoundaryChars().getValue().toCharArray()
                : DEFAULT_BOUNDARY_CHARS)
        .withBoundaryMaxScan(
            settings.hasBoundaryMaxScan()
                ? settings.getBoundaryMaxScan().getValue()
                : DEFAULT_BOUNDARY_MAX_SCAN)
        .withBoundaryScannerLocale(
            settings.hasBoundaryScannerLocale()
                ? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue())
                : DEFAULT_BOUNDARY_SCANNER_LOCALE)
        .withCustomHighlighterParams(
            settings.hasCustomHighlighterParams()
                ? StructValueTransformer.transformStruct(settings.getCustomHighlighterParams())
                : Collections.EMPTY_MAP);

    Query query =
        settings.hasHighlightQuery()
            ? QUERY_NODE_MAPPER.getQuery(settings.getHighlightQuery(), indexState)
            : searchQuery;
    builder.withHighlightQuery(query);

    return builder.build();
  }

  private static String resolveHighlighterName(Settings settings) {
    switch (settings.getHighlighterType()) {
      case DEFAULT:
        /* default -> default-highlighter is only applicable in global settings.
         * In field override, we should return the one in global settings instead.
         */
        return DEFAULT_HIGHLIGHTER_NAME;
      case PLAIN:
        throw new UnsupportedOperationException("plain-highlighter is not supported yet.");
      case FAST_VECTOR:
        return NRTFastVectorHighlighter.HIGHLIGHTER_NAME;
      case CUSTOM:
        return settings.getCustomHighlighterName();
      default:
        throw new IllegalArgumentException(
            String.format("Unknown highlighter_type: %s", settings.getHighlighterType()));
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy