com.floreysoft.jmte.util.MiniParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jmte Show documentation
Show all versions of jmte Show documentation
To build and locally install jar, javadoc and sources, please use:
mvn clean javadoc:jar source:jar install -Dmaven.test.skip=true
Tested on Maven 2.0.9, JDK 1.7
package com.floreysoft.jmte.util;
import java.util.ArrayList;
import java.util.List;
/**
* Parser for embedded mini languages.
*
*
*
* - Solves Demarcation: Where does an embedded language begin and where does
* it end
*
* - Escaping
*
- Quotation
*
- Graceful reaction to and recovery from invalid input
*
*
* - Lays ground for common patterns of mini langauge processing
*
* - all kinds of nested brackets
*
- segmentation of data
*
- not loosing context
*
- context sensitive parsing aka lexer modes/states
*
*
*
*
*
* Not thread safe.
*
* @author olli
*
*/
public final class MiniParser {
public final static char DEFAULT_ESCAPE_CHAR = '\\';
public final static char DEFAULT_QUOTE_CHAR = '"';
public static MiniParser defaultInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, false,
false, false);
}
public static MiniParser trimmedInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, false,
true, false);
}
public static MiniParser ignoreCaseInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, true,
false, false);
}
public static MiniParser fullRawInstance() {
return new MiniParser((char) -1, (char) -1, false, false, true);
}
public static MiniParser rawOutputInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, false,
false, true);
}
private final char escapeChar;
private final char quoteChar;
private final boolean ignoreCase;
private final boolean trim;
private final boolean rawOutput;
private transient boolean escaped = false;
private transient boolean quoted = false;
public MiniParser(final char escapeChar, final char quoteChar,
final boolean ignoreCase, final boolean trim,
final boolean rawOutput) {
this.escapeChar = escapeChar;
this.quoteChar = quoteChar;
this.ignoreCase = ignoreCase;
this.trim = trim;
this.rawOutput = rawOutput;
}
public String replace(final String input, final String oldString,
final String newString) {
try {
if (oldString == null || oldString.equals("")) {
return input;
}
StringBuilder buffer = new StringBuilder();
for (int index = 0; index < input.length(); index++) {
if (input.regionMatches(ignoreCase, index, oldString, 0,
oldString.length())) {
buffer.append(newString);
index += oldString.length() - 1;
} else {
char c = input.charAt(index);
append(buffer, c);
}
}
return buffer.toString();
} finally {
escaped = false;
quoted = false;
}
}
public List split(final String input, final char separator) {
return split(input, separator, Integer.MAX_VALUE);
}
public List split(final String input, final char separator,
final int maxSegments) {
return splitInternal(input, false, separator, null, maxSegments);
}
public List split(final String input, final String separatorSet) {
return split(input, separatorSet, Integer.MAX_VALUE);
}
public List split(final String input, final String separatorSet,
final int maxSegments) {
return splitInternal(input, false, (char) -1, separatorSet, maxSegments);
}
public List splitOnWhitespace(final String input,
final int maxSegments) {
return splitInternal(input, true, (char) -1, null, maxSegments);
}
public List splitOnWhitespace(final String input) {
return splitOnWhitespace(input, Integer.MAX_VALUE);
}
// Common implementation for single char separator and string set separator.
// Has the benefit of shared code and caliper mini benchmarks showed no
// measurable performance penalty for additional check which separator to
// use
private List splitInternal(final String input,
final boolean splitOnWhitespace, final char separator,
final String separatorSet, final int maxSegments) {
if (input == null) {
return null;
}
try {
final List segments = new ArrayList();
StringBuilder buffer = new StringBuilder();
for (int index = 0; index < input.length(); index++) {
final char c = input.charAt(index);
boolean separatedByWhitespace = false;
if (splitOnWhitespace) {
for (; index < input.length()
&& Character.isWhitespace(input.charAt(index)); index++) {
separatedByWhitespace = true;
}
if (separatedByWhitespace) {
index--;
}
}
final boolean separates = separatedByWhitespace
|| (separatorSet != null ? separatorSet.indexOf(c) != -1
: c == separator);
// in case we are not already in the last segment and there is
// an
// unsecaped, unquoted separator, this segment is now done
if (segments.size() != maxSegments - 1 && separates
&& !isEscaped()) {
finish(segments, buffer);
buffer = new StringBuilder();
} else {
append(buffer, c);
}
}
if (!splitOnWhitespace || buffer.length() != 0) {
finish(segments, buffer);
}
return segments;
} finally {
escaped = false;
quoted = false;
}
}
private void finish(final List segments, StringBuilder buffer) {
String string = buffer.toString();
segments.add(trim ? string.trim() : string);
}
public int lastIndexOf(final String input, final String substring) {
return indexOfInternal(input, substring, true);
}
public int indexOf(final String input, final String substring) {
return indexOfInternal(input, substring, false);
}
private int indexOfInternal(final String input, final String substring,
boolean last) {
int resultIndex = -1;
for (int index = 0; index < input.length(); index++) {
if (input.regionMatches(ignoreCase, index, substring, 0, substring
.length())
&& !isEscaped()) {
resultIndex = index;
if (!last) {
break;
}
}
}
return resultIndex;
}
public List scan(final String input, final String splitStart,
final String splitEnd) {
return scan(input, splitStart, splitEnd, false);
}
public List greedyScan(final String input, final String splitStart,
final String splitEnd) {
return scan(input, splitStart, splitEnd, true);
}
public List scan(final String input, final String splitStart,
final String splitEnd, boolean greedy) {
if (input == null) {
return null;
}
try {
final List segments = new ArrayList();
StringBuilder buffer = new StringBuilder();
boolean started = false;
int lastIndexOfEnd = greedy ? lastIndexOfEnd = lastIndexOf(input,
splitEnd) : -1;
char c;
int index = 0;
while (index < input.length()) {
c = input.charAt(index);
final boolean greedyCond = !started || !greedy
|| index == lastIndexOfEnd;
final String separator = started ? splitEnd : splitStart;
if (input.regionMatches(ignoreCase, index, separator, 0,
separator.length())
&& !isEscaped() && greedyCond) {
finish(segments, buffer);
buffer = new StringBuilder();
started = !started;
index += separator.length();
} else {
append(buffer, c);
index++;
}
}
// add trailing element to result
if (buffer.length() != 0) {
finish(segments, buffer);
}
return segments;
} finally {
escaped = false;
quoted = false;
}
}
public String unescape(final String input) {
final StringBuilder unescaped = new StringBuilder();
for (int i = 0; i < input.length(); i++) {
final char c = input.charAt(i);
append(unescaped, c);
}
return unescaped.toString();
}
// the heart of it all
private void append(StringBuilder buffer, char c) {
// version manually simplified
// final boolean shouldAppend = rawOutput || escaped
// || (c != quoteChar && c != escapeChar);
// final boolean newEscaped = c == escapeChar && !escaped;
// final boolean newQuoted = (c == quoteChar && !escaped) ? !quoted
// : quoted;
// side-effect free version directly extracted from if
// final boolean shouldAppend = (c == escapeChar && (escaped ||
// rawOutput))
// || (c == quoteChar && (escaped || rawOutput))
// || !(c == quoteChar || c == escapeChar);
// final boolean newEscaped = c == escapeChar ? !escaped
// : (c == quoteChar ? false : false);
// final boolean newQuoted = c == escapeChar ? quoted
// : (c == quoteChar ? (!escaped ? !quoted : quoted) : quoted);
// if (shouldAppend) {
// buffer.append(c);
// }
//
// escaped = newEscaped;
// quoted = newQuoted;
// original version
// XXX needed to revert to this original version as micro benchmark
// tests
// showed a slow down of more than 100%
if (c == escapeChar) {
if (escaped || rawOutput) {
buffer.append(c);
}
escaped = !escaped;
} else if (c == quoteChar) {
if (escaped) {
buffer.append(c);
escaped = false;
} else {
quoted = !quoted;
if (rawOutput) {
buffer.append(c);
}
}
} else {
buffer.append(c);
escaped = false;
}
}
private boolean isEscaped() {
return escaped || quoted;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy