All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.intelie.pipes.util.Escapes Maven / Gradle / Ivy

There is a newer version: 0.25.5
Show newest version
package net.intelie.pipes.util;

import net.intelie.pipes.Function;
import net.intelie.pipes.ast.*;
import net.intelie.pipes.filters.Segment;

import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

public abstract class Escapes {
    public static final Pattern DIACRITICS = Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
    public static final char[] BACKSLASH = new char[]{'\'', '\\'};
    public static final char[] BRACES = new char[]{'{', '}'};
    private static final char[] ID_FORBIDDEN = {' ', '\n', '\t', '\r', '\u3000', '#', '$', ',', '+', '-', '!', '(', ')', ':', '^',
            '[', ']', '"', '\'', '{', '}', '~', '*', '?', '\\', '/', '%', '>', '<', '=', '@', '&', '|'};
    private static final Pattern FLAT = Pattern.compile("[^\\w]+");
    private static final Pattern DUPLICATED = Pattern.compile("[_]+");

    static {
        Arrays.sort(ID_FORBIDDEN);
        Arrays.sort(BACKSLASH);
        Arrays.sort(BRACES);
    }

    private static String unescapeNext(CharIterator it, String stop) {
        List result = new ArrayList();
        StringBuilder builder = new StringBuilder();
        boolean escaping = false;
        int multiplier = 0;
        int code = 0;

        while (it.moveNext(escaping ? "" : stop)) {
            char c = it.current();
            if (multiplier > 0) {
                code += Character.digit(c, 16) * multiplier;
                multiplier /= 16;
                if (multiplier == 0) {
                    builder.appendCodePoint(code);
                    code = 0;
                }
            } else if (escaping) {
                if (c == 'u') {
                    // found an escaped unicode character
                    multiplier = 16 * 16 * 16;
                } else if (c == 'x') {
                    // found an 8 bit escaped unicode character
                    multiplier = 16;
                } else if (c == 'n') {
                    builder.append('\n');
                } else if (c == 't') {
                    builder.append('\t');
                } else if (c == 'r') {
                    builder.append('\r');
                } else {
                    builder.append(c);
                }
                escaping = false;
            } else if (c == '\\') {
                escaping = true;
            } else {
                builder.append(c);
            }
        }

        if (multiplier > 0)
            throw new IllegalArgumentException("Truncated unicode escape sequence.");

        if (escaping)
            throw new IllegalArgumentException("Term can not end with escape character.");

        return builder.toString();
    }

    public static Segment[] unescapeWildcard(String input) {
        if (input == null) return null;
        List nodes = unescapeWildcard(
                new SourceLocation(
                        new SourceLocation(input, 0, 1, 1).withType(SourceLocation.Type.NONE),
                        new SourceLocation(input, input.length(), 1, Math.max(1, input.length()))),
                input);

        return nodes.stream().map(x -> {
            switch (((CallNode) x).getName()) {
                case Function.FT_SEG_STAR:
                    return new Segment.Star();
                case Function.FT_SEG_QUESTION:
                    return new Segment.Question();
                default: //it must be FT_SEG_LITERAL
                    return new Segment.Literal(((LiteralNode) ((CallNode) x).getArgs().get(0)).getValue());
            }
        }).toArray(Segment[]::new);
    }

    public static List unescapeWildcard(SourceLocation location, String input) {
        if (input == null) return null;

        Preconditions.checkArgument(location.getLength() == input.length(), "invalid length");
        Preconditions.checkArgument(location.getBeginLine() == location.getEndLine(), "more than one line");
        Preconditions.checkArgument(location.getEndColumn() - location.getBeginColumn() + (input.length() > 0 ? 1 : 0) == input.length(), "invalid length");

        List result = new ArrayList<>();
        CharIterator it = new CharIterator(input);

        while (it.hasNext("")) {
            unescapeLiteral(location, result, it);
            unescapeStar(location, result, it);
        }

        return result;
    }

    private static void unescapeStar(SourceLocation location, List result, CharIterator it) {
        int start = it.nextIndex();
        if (it.moveNext("")) {
            SourceLocation thisLoc = location.subLocation(SourceLocation.Type.NONE, start, it.nextIndex());

            result.add(new CallNode(thisLoc,
                    it.current() == '?'
                            ? Function.FT_SEG_QUESTION
                            : Function.FT_SEG_STAR));
        }
    }

    private static void unescapeLiteral(SourceLocation location, List result, CharIterator it) {
        int start = it.nextIndex();
        String next = unescapeNext(it, "?*");
        if (next.length() > 0) {
            SourceLocation thisLoc = location.subLocation(SourceLocation.Type.NONE, start, it.nextIndex());
            result.add(new CallNode(thisLoc, Function.FT_SEG_LITERAL, new LiteralNode(thisLoc, net.intelie.pipes.types.Type.STRING, next)));
        }
    }

    public static String unescape(String input) {
        return unescapeNext(new CharIterator(input), "");
    }

    private static class CharIterator {
        private final String s;
        private int i;

        public CharIterator(String s) {
            this.s = s;
            this.i = -1;
        }

        public int nextIndex() {
            return i + 1;
        }

        public boolean moveNext(String stop) {
            if (!hasNext(stop)) return false;
            i++;
            return true;
        }

        private boolean hasNext(String stop) {
            return i + 1 < s.length() && stop.indexOf(s.charAt(i + 1)) < 0;
        }

        public char current() {
            return s.charAt(i);
        }
    }

    public static String formatString(String s) {
        return "'" + escapeInternal(s, BACKSLASH) + "'";
    }

    public static String formatIdentifier(String s) {
        String s2 = escapeInternal(s, BRACES);
        if (s2.length() != s.length() || needsIdentifierFormatting(s))
            return "{" + s2 + "}";
        else
            return s;
    }

    public static String formatUnquotedString(String s) {
        return escapeInternal(s, ID_FORBIDDEN);
    }

    public static String safeIdentifier(String s) {
        s = Normalizer.normalize(s, Normalizer.Form.NFD);
        s = DIACRITICS.matcher(s).replaceAll("");
        s = FLAT.matcher(s).replaceAll("_");
        s = DUPLICATED.matcher(s).replaceAll("_");
        s = trimUnderscore(s);

        if (s.length() == 0) return "__";
        if (Character.isDigit(s.charAt(0))) return "_" + s;
        return s;
    }

    private static String trimUnderscore(String s) {
        int i = 0;
        while (i < s.length() && s.charAt(i) == '_')
            ++i;

        int j = s.length();
        while (j > 0 && s.charAt(j - 1) == '_')
            j--;

        if (i > j)
            return "";
        return s.substring(i, j);
    }

    public static boolean needsIdentifierFormatting(String s) {
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            if (Arrays.binarySearch(ID_FORBIDDEN, c) >= 0) return true;
            if (Character.isDigit(c) && i == 0) return true;
            if (c == '@' && i != 0) return true;
        }
        return false;
    }

    public static String escape(String s) {
        return escapeInternal(s);
    }

    private static String escapeInternal(String s, char... escapeChars) {
        StringBuilder sb = new StringBuilder();

        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);

            if (c == '\n') {
                sb.append("\\n");
            } else if (c == '\t') {
                sb.append("\\t");
            } else if (c == '\r') {
                sb.append("\\r");
            } else if (c >= 256) {
                sb.append("\\u").append(hex(c, 4));
            } else if (c <= 31 || c >= 128) {
                sb.append("\\x").append(hex(c, 2));
            } else {
                if (Arrays.binarySearch(escapeChars, c) >= 0)
                    sb.append('\\');
                sb.append(c);
            }
        }
        return sb.toString();
    }

    private static String hex(char c, int size) {
        StringBuilder sb = new StringBuilder();
        String s = Integer.toString(c, 16);
        for (int i = s.length(); i < size; i++)
            sb.append('0');
        sb.append(s);
        return sb.toString();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy