All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2 Maven / Gradle / Ivy

There is a newer version: 8.14.1
Show newest version
package org.elasticsearch.common.lucene.search.vectorhighlight;

import gnu.trove.set.hash.TCharHashSet;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;

/**
 * A copy of Lucene {@link org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner}.
 * 

* Uses specialized char set to lookup boundary, and fixes a problem with start offset in the * beginning of the text: https://issues.apache.org/jira/browse/LUCENE-3697 (which has a problem * with multiple empty fields to highlight...). */ public class SimpleBoundaryScanner2 implements BoundaryScanner { public static final int DEFAULT_MAX_SCAN = 20; public static final char[] DEFAULT_BOUNDARY_CHARS = {'.', ',', '!', '?', ' ', '\t', '\n'}; public static final SimpleBoundaryScanner2 DEFAULT = new SimpleBoundaryScanner2(); public int maxScan; public TCharHashSet boundaryChars; public SimpleBoundaryScanner2() { this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS); } public SimpleBoundaryScanner2(int maxScan, char[] boundaryChars) { this.maxScan = maxScan; this.boundaryChars = new TCharHashSet(boundaryChars); } public int findStartOffset(StringBuilder buffer, int start) { // avoid illegal start offset if (start > buffer.length() || start < 1) return start; int offset, count = maxScan; for (offset = start; offset > 0 && count > 0; count--) { // found? if (boundaryChars.contains(buffer.charAt(offset - 1))) return offset; offset--; } // LUCENE-3697 if (offset == 0) { return 0; } // not found return start; } public int findEndOffset(StringBuilder buffer, int start) { // avoid illegal start offset if (start > buffer.length() || start < 0) return start; int offset, count = maxScan; //for( offset = start; offset <= buffer.length() && count > 0; count-- ){ for (offset = start; offset < buffer.length() && count > 0; count--) { // found? if (boundaryChars.contains(buffer.charAt(offset))) return offset; offset++; } // not found return start; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy