![JAR search and dependency download from the Maven repository](/logo.png)
name.fraser.neil.plaintext.StandardBreakScorer Maven / Gradle / Ivy
package name.fraser.neil.plaintext;
import java.util.regex.Pattern;
import static java.lang.Character.CONTROL;
public class StandardBreakScorer implements SemanticBreakScorer {
private final Pattern BLANKLINEEND
= Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL);
private final Pattern BLANKLINESTART
= Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL);
public int scoreBreakOver(String one, String two) {
if (one.length() == 0 || two.length() == 0) {
// Edges are the best.
return 5;
}
// Each port of this function behaves slightly differently due to
// subtle differences in each language's definition of things like
// 'whitespace'. Since this function's purpose is largely cosmetic,
// the choice has been made to use each language's native features
// rather than force total conformity.
int score = 0;
// One point for non-alphanumeric.
char endOne = one.charAt(one.length() - 1);
char startTwo = two.charAt(0);
if (!Character.isLetterOrDigit(endOne) || !Character.isLetterOrDigit(startTwo)) {
score++;
// Two points for whitespace.
if (Character.isWhitespace(endOne) || Character.isWhitespace(startTwo)) {
score++;
// Three points for line breaks.
if (Character.getType(endOne) == CONTROL || Character.getType(startTwo) == CONTROL) {
score++;
// Four points for blank lines.
if (BLANKLINEEND.matcher(one).find() || BLANKLINESTART.matcher(two).find()) {
score++;
}
}
}
}
return score;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy