org.apache.lucene.search.postingshighlight.CustomPassageFormatter Maven / Gradle / Ivy
/*
* Licensed to Elasticsearch under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elasticsearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.lucene.search.postingshighlight;
import org.apache.lucene.search.highlight.Encoder;
import org.elasticsearch.search.highlight.HighlightUtils;
/**
Custom passage formatter that allows us to:
1) extract different snippets (instead of a single big string) together with their scores ({@link Snippet})
2) use the {@link Encoder} implementations that are already used with the other highlighters
*/
public class CustomPassageFormatter extends PassageFormatter {
private final String preTag;
private final String postTag;
private final Encoder encoder;
public CustomPassageFormatter(String preTag, String postTag, Encoder encoder) {
this.preTag = preTag;
this.postTag = postTag;
this.encoder = encoder;
}
@Override
public Snippet[] format(Passage[] passages, String content) {
Snippet[] snippets = new Snippet[passages.length];
int pos;
for (int j = 0; j < passages.length; j++) {
Passage passage = passages[j];
StringBuilder sb = new StringBuilder();
pos = passage.startOffset;
for (int i = 0; i < passage.numMatches; i++) {
int start = passage.matchStarts[i];
int end = passage.matchEnds[i];
// its possible to have overlapping terms
if (start > pos) {
append(sb, content, pos, start);
}
if (end > pos) {
sb.append(preTag);
append(sb, content, Math.max(pos, start), end);
sb.append(postTag);
pos = end;
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
append(sb, content, pos, Math.max(pos, passage.endOffset));
//we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
}
//and we trim the snippets too
snippets[j] = new Snippet(sb.toString().trim(), passage.score, passage.numMatches > 0);
}
return snippets;
}
protected void append(StringBuilder dest, String content, int start, int end) {
dest.append(encoder.encodeText(content.substring(start, end)));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy