All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.palantir.giraffe.file.base.GlobToRegexParser Maven / Gradle / Ivy

There is a newer version: 0.10.1
Show newest version
/**
 * Copyright 2015 Palantir Technologies, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.palantir.giraffe.file.base;

import static com.google.common.base.Preconditions.checkNotNull;

import java.util.regex.PatternSyntaxException;

final class GlobToRegexParser {

    private static final String REGEX_META_CHARS = "\\.*?+^$()[]{}|";
    private static final String GLOB_META_CHARS = "\\*?[]{},";

    private final String glob;
    private final char separator;

    private int index = 0;
    private StringBuilder regex;

    GlobToRegexParser(String glob, char separator) {
        this.glob = checkNotNull(glob, "glob must be non-null");
        this.separator = separator;
    }

    /**
     * Parses this glob to create an equivalent regular expression that matches
     * the same inputs.
     *
     * @return the regular expression
     */
    public String parseToRegex() {
        if (regex == null) {
            regex = new StringBuilder("^");
            parseGlob();
            regex.append('$');
        }
        return regex.toString();
    }

    private void parseGlob() {
        while (index < glob.length()) {
            char c = glob.charAt(index++);
            if (c == '[') {
                parseBracketExpression();
            } else if (c == '{') {
                parseGroup();
            } else {
                parseChar(c);
            }
        }
    }

    private void parseChar(char c) {
        if (c == '*') {
            if (peek() == '*') {
                regex.append(".*");
                index++;
            } else {
                regex.append("[^" + separator + "]*");
            }
        } else if (c == '?') {
            regex.append("[^" + separator + "]");
        } else if (c == '\\') {
            if (peek() < 0) {
                throw syntaxError("no escaped character");
            } else if (!isGlobMetaChar(glob.charAt(index))) {
                throw syntaxError("character is not escapable", index);
            } else {
                escapeAndAppendChar(glob.charAt(index));
                index++;
            }
        } else {
            escapeAndAppendChar(c);
        }
    }

    private void parseBracketExpression() {
        regex.append("[[^" + separator + "]&&[");

        int first = peek();
        if (first == '!') {
            regex.append('^');
            index++;
        } else if (first == '^') {
            // '^' only needs escaping at the start of a character class
            regex.append('\\').append('^');
            index++;
        } else if (first == ']') {
            throw syntaxError("empty bracket expression");
        }

        // '-' matches itself only at start of expression
        if (first == '-' || (first == '!' && peek() == '-')) {
            regex.append('-');
            index++;
        }

        char c = 0;
        char last = 0;
        boolean rangeAllowed = false;
        while (index < glob.length()) {
            c = glob.charAt(index++);
            if (c == ']') {
                break;
            } else if (c == separator) {
                throw syntaxError("separator in bracket expression");
            } else if (c == '-') {
                if (!rangeAllowed) {
                    throw syntaxError("invalid range");
                }
                regex.append('-');
                if (peek() < 0 || peek() == ']') {
                    break;
                }
                if (peek() < last) {
                    throw syntaxError("invalid range", index - 2);
                }
                rangeAllowed = false;
            } else {
                if (c == '[' || c == '\\' || (c == '&' && peek() == '&')) {
                    regex.append('\\');
                }
                regex.append(c);
                last = c;
                rangeAllowed = true;
            }
        }
        if (c != ']') {
            throw syntaxError("unterminated bracked expression");
        }
        regex.append("]]");
    }

    private void parseGroup() {
        regex.append("(?:");
        char c = 0;
        while (index < glob.length()) {
            c = glob.charAt(index++);
            if (c == '}') {
                break;
            } else if (c == '{') {
                throw syntaxError("nested group");
            } else if (c == ',') {
                regex.append('|');
            } else if (c == '[') {
                parseBracketExpression();
            } else {
                parseChar(c);
            }
        }
        if (c != '}') {
            throw syntaxError("unterminated group");
        }
        regex.append(')');
    }

    /**
     * Returns the character code at {@code index} or -1 there are no characters
     * left in {@code glob}.
     */
    private int peek() {
        return (index < glob.length()) ? glob.charAt(index) : -1;
    }

    private void escapeAndAppendChar(char c) {
        if (REGEX_META_CHARS.indexOf(c) >= 0) {
            regex.append('\\');
        }
        regex.append(c);
    }

    private static boolean isGlobMetaChar(char c) {
        return GLOB_META_CHARS.indexOf(c) >= 0;
    }

    /**
     * Throws a new {@link PatternSyntaxException} with the given description
     * and the current index.
     */
    private PatternSyntaxException syntaxError(String description) {
        return syntaxError(description, index - 1);
    }

    /**
     * Throws a new {@link PatternSyntaxException} with the given description
     * and given index.
     */
    private PatternSyntaxException syntaxError(String description, int errorIndex) {
        throw new PatternSyntaxException(description, glob, errorIndex);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy