org.apache.lucene.search.uhighlight.CustomPassageFormatter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch - Open Source, Distributed, RESTful Search Engine
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search.uhighlight;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.Snippet;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils;
/**
* Custom passage formatter that allows us to:
* 1) extract different snippets (instead of a single big string) together with their scores ({@link Snippet})
* 2) use the {@link Encoder} implementations that are already used with the other highlighters
*/
public class CustomPassageFormatter extends PassageFormatter {
private final String preTag;
private final String postTag;
private final Encoder encoder;
public CustomPassageFormatter(String preTag, String postTag, Encoder encoder) {
this.preTag = preTag;
this.postTag = postTag;
this.encoder = encoder;
}
@Override
public Snippet[] format(Passage[] passages, String content) {
Snippet[] snippets = new Snippet[passages.length];
int pos;
for (int j = 0; j < passages.length; j++) {
Passage passage = passages[j];
StringBuilder sb = new StringBuilder();
pos = passage.getStartOffset();
for (int i = 0; i < passage.getNumMatches(); i++) {
int start = passage.getMatchStarts()[i];
int end = passage.getMatchEnds()[i];
// its possible to have overlapping terms
if (start > pos) {
append(sb, content, pos, start);
}
if (end > pos) {
sb.append(preTag);
append(sb, content, Math.max(pos, start), end);
sb.append(postTag);
pos = end;
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.getEndOffset()));
//we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
} else if (sb.charAt(sb.length() - 1) == HighlightUtils.NULL_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
}
//and we trim the snippets too
snippets[j] = new Snippet(sb.toString().trim(), passage.getScore(), passage.getNumMatches() > 0);
}
return snippets;
}
private void append(StringBuilder dest, String content, int start, int end) {
dest.append(encoder.encodeText(content.substring(start, end)));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy