net.intelie.pipes.util.Escapes Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pipes-api Show documentation
Show all versions of pipes-api Show documentation
Intelie Pipes' API classes and interfaces
package net.intelie.pipes.util;
import net.intelie.pipes.Function;
import net.intelie.pipes.ast.*;
import net.intelie.pipes.filters.Segment;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public abstract class Escapes {
public static final Pattern DIACRITICS = Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
public static final char[] BACKSLASH = new char[]{'\'', '\\'};
public static final char[] BRACES = new char[]{'{', '}'};
private static final char[] ID_FORBIDDEN = {' ', '\n', '\t', '\r', '\u3000', '#', '$', ',', '+', '-', '!', '(', ')', ':', '^',
'[', ']', '"', '\'', '{', '}', '~', '*', '?', '\\', '/', '%', '>', '<', '=', '@', '&', '|'};
private static final Pattern FLAT = Pattern.compile("[^\\w]+");
private static final Pattern DUPLICATED = Pattern.compile("[_]+");
static {
Arrays.sort(ID_FORBIDDEN);
Arrays.sort(BACKSLASH);
Arrays.sort(BRACES);
}
private static String unescapeNext(CharIterator it, String stop) {
List result = new ArrayList();
StringBuilder builder = new StringBuilder();
boolean escaping = false;
int multiplier = 0;
int code = 0;
while (it.moveNext(escaping ? "" : stop)) {
char c = it.current();
if (multiplier > 0) {
code += Character.digit(c, 16) * multiplier;
multiplier /= 16;
if (multiplier == 0) {
builder.appendCodePoint(code);
code = 0;
}
} else if (escaping) {
if (c == 'u') {
// found an escaped unicode character
multiplier = 16 * 16 * 16;
} else if (c == 'x') {
// found an 8 bit escaped unicode character
multiplier = 16;
} else if (c == 'n') {
builder.append('\n');
} else if (c == 't') {
builder.append('\t');
} else if (c == 'r') {
builder.append('\r');
} else {
builder.append(c);
}
escaping = false;
} else if (c == '\\') {
escaping = true;
} else {
builder.append(c);
}
}
if (multiplier > 0)
throw new IllegalArgumentException("Truncated unicode escape sequence.");
if (escaping)
throw new IllegalArgumentException("Term can not end with escape character.");
return builder.toString();
}
public static Segment[] unescapeWildcard(String input) {
if (input == null) return null;
List nodes = unescapeWildcard(
new SourceLocation(
new SourceLocation(input, 0, 1, 1).withType(SourceLocation.Type.NONE),
new SourceLocation(input, input.length(), 1, Math.max(1, input.length()))),
input);
return nodes.stream().map(x -> {
switch (((CallNode) x).getName()) {
case Function.FT_SEG_STAR:
return new Segment.Star();
case Function.FT_SEG_QUESTION:
return new Segment.Question();
default: //it must be FT_SEG_LITERAL
return new Segment.Literal(((LiteralNode) ((CallNode) x).getArgs().get(0)).getValue());
}
}).toArray(Segment[]::new);
}
public static List unescapeWildcard(SourceLocation location, String input) {
if (input == null) return null;
Preconditions.checkArgument(location.getLength() == input.length(), "invalid length");
Preconditions.checkArgument(location.getBeginLine() == location.getEndLine(), "more than one line");
Preconditions.checkArgument(location.getEndColumn() - location.getBeginColumn() + (input.length() > 0 ? 1 : 0) == input.length(), "invalid length");
List result = new ArrayList<>();
CharIterator it = new CharIterator(input);
while (it.hasNext("")) {
unescapeLiteral(location, result, it);
unescapeStar(location, result, it);
}
return result;
}
private static void unescapeStar(SourceLocation location, List result, CharIterator it) {
int start = it.nextIndex();
if (it.moveNext("")) {
SourceLocation thisLoc = location.subLocation(SourceLocation.Type.NONE, start, it.nextIndex());
result.add(new CallNode(thisLoc,
it.current() == '?'
? Function.FT_SEG_QUESTION
: Function.FT_SEG_STAR));
}
}
private static void unescapeLiteral(SourceLocation location, List result, CharIterator it) {
int start = it.nextIndex();
String next = unescapeNext(it, "?*");
if (next.length() > 0) {
SourceLocation thisLoc = location.subLocation(SourceLocation.Type.NONE, start, it.nextIndex());
result.add(new CallNode(thisLoc, Function.FT_SEG_LITERAL, new LiteralNode(thisLoc, net.intelie.pipes.types.Type.STRING, next)));
}
}
public static String unescape(String input) {
return unescapeNext(new CharIterator(input), "");
}
private static class CharIterator {
private final String s;
private int i;
public CharIterator(String s) {
this.s = s;
this.i = -1;
}
public int nextIndex() {
return i + 1;
}
public boolean moveNext(String stop) {
if (!hasNext(stop)) return false;
i++;
return true;
}
private boolean hasNext(String stop) {
return i + 1 < s.length() && stop.indexOf(s.charAt(i + 1)) < 0;
}
public char current() {
return s.charAt(i);
}
}
public static String formatString(String s) {
return "'" + escapeInternal(s, BACKSLASH) + "'";
}
public static String formatIdentifier(String s) {
String s2 = escapeInternal(s, BRACES);
if (s2.length() != s.length() || needsIdentifierFormatting(s))
return "{" + s2 + "}";
else
return s;
}
public static String formatUnquotedString(String s) {
return escapeInternal(s, ID_FORBIDDEN);
}
public static String safeIdentifier(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFD);
s = DIACRITICS.matcher(s).replaceAll("");
s = FLAT.matcher(s).replaceAll("_");
s = DUPLICATED.matcher(s).replaceAll("_");
s = trimUnderscore(s);
if (s.length() == 0) return "__";
if (Character.isDigit(s.charAt(0))) return "_" + s;
return s;
}
private static String trimUnderscore(String s) {
int i = 0;
while (i < s.length() && s.charAt(i) == '_')
++i;
int j = s.length();
while (j > 0 && s.charAt(j - 1) == '_')
j--;
if (i > j)
return "";
return s.substring(i, j);
}
public static boolean needsIdentifierFormatting(String s) {
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (Arrays.binarySearch(ID_FORBIDDEN, c) >= 0) return true;
if (Character.isDigit(c) && i == 0) return true;
if (c == '@' && i != 0) return true;
}
return false;
}
public static String escape(String s) {
return escapeInternal(s);
}
private static String escapeInternal(String s, char... escapeChars) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '\n') {
sb.append("\\n");
} else if (c == '\t') {
sb.append("\\t");
} else if (c == '\r') {
sb.append("\\r");
} else if (c >= 256) {
sb.append("\\u").append(hex(c, 4));
} else if (c <= 31 || c >= 128) {
sb.append("\\x").append(hex(c, 2));
} else {
if (Arrays.binarySearch(escapeChars, c) >= 0)
sb.append('\\');
sb.append(c);
}
}
return sb.toString();
}
private static String hex(char c, int size) {
StringBuilder sb = new StringBuilder();
String s = Integer.toString(c, 16);
for (int i = s.length(); i < size; i++)
sb.append('0');
sb.append(s);
return sb.toString();
}
}