All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.postingshighlight.CustomPassageFormatter Maven / Gradle / Ivy

There is a newer version: 8.15.1
Show newest version
/*
 * Licensed to Elasticsearch under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Elasticsearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.lucene.search.postingshighlight;

import org.apache.lucene.search.highlight.Encoder;
import org.elasticsearch.search.highlight.HighlightUtils;

/**
Custom passage formatter that allows us to:
1) extract different snippets (instead of a single big string) together with their scores ({@link Snippet})
2) use the {@link Encoder} implementations that are already used with the other highlighters
 */
public class CustomPassageFormatter extends PassageFormatter {

    private final String preTag;
    private final String postTag;
    private final Encoder encoder;

    public CustomPassageFormatter(String preTag, String postTag, Encoder encoder) {
        this.preTag = preTag;
        this.postTag = postTag;
        this.encoder = encoder;
    }

    @Override
    public Snippet[] format(Passage[] passages, String content) {
        Snippet[] snippets = new Snippet[passages.length];
        int pos;
        for (int j = 0; j < passages.length; j++) {
            Passage passage = passages[j];
            StringBuilder sb = new StringBuilder();
            pos = passage.startOffset;
            for (int i = 0; i < passage.numMatches; i++) {
                int start = passage.matchStarts[i];
                int end = passage.matchEnds[i];
                // its possible to have overlapping terms
                if (start > pos) {
                    append(sb, content, pos, start);
                }
                if (end > pos) {
                    sb.append(preTag);
                    append(sb, content, Math.max(pos, start), end);
                    sb.append(postTag);
                    pos = end;
                }
            }
            // its possible a "term" from the analyzer could span a sentence boundary.
            append(sb, content, pos, Math.max(pos, passage.endOffset));
            //we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
            if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
                sb.deleteCharAt(sb.length() - 1);
            }
            //and we trim the snippets too
            snippets[j] = new Snippet(sb.toString().trim(), passage.score, passage.numMatches > 0);
        }
        return snippets;
    }

    protected void append(StringBuilder dest, String content, int start, int end) {
        dest.append(encoder.encodeText(content.substring(start, end)));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy