All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.cryptomator.cryptofs.GlobToRegex Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2013, 2016 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.cryptomator.cryptofs;

import static com.google.common.base.Preconditions.checkNotNull;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.regex.PatternSyntaxException;

import com.google.common.base.CharMatcher;

/**
 * Translates globs to regex patterns.
 *
 * @author Colin Decker
 */
class GlobToRegex {

	/**
	 * Converts the given glob to a regular expression pattern. The given separators determine what
	 * characters the resulting expression breaks on for glob expressions such as * which should not
	 * cross directory boundaries.
	 *
	 * 

* Basic conversions (assuming / as only separator): * *

	 * ?        = [^/]
	 * *        = [^/]*
	 * **       = .*
	 * [a-z]    = [[^/]&&[a-z]]
	 * [!a-z]   = [[^/]&&[^a-z]]
	 * {a,b,c}  = (a|b|c)
	 * 
*/ public static String toRegex(String glob, char separator) { return new GlobToRegex(glob, separator).convert(); } private static final CharMatcher REGEX_RESERVED = CharMatcher.anyOf("^$.?+*\\[]{}()"); private final String glob; private final char separator; private final CharMatcher separatorMatcher; private final StringBuilder builder = new StringBuilder(); private final Deque states = new ArrayDeque<>(); private int index; private GlobToRegex(String glob, char separator) { this.glob = checkNotNull(glob); this.separator = separator; this.separatorMatcher = CharMatcher.anyOf(String.valueOf(separator)); } /** * Converts the glob to a regex one character at a time. A state stack (states) is maintained, * with the state at the top of the stack being the current state at any given time. The current * state is always used to process the next character. When a state processes a character, it may * pop the current state or push a new state as the current state. The resulting regex is written * to {@code builder}. */ private String convert() { pushState(NORMAL); for (index = 0; index < glob.length(); index++) { currentState().process(this, glob.charAt(index)); } currentState().finish(this); return builder.toString(); } /** * Enters the given state. The current state becomes the previous state. */ private void pushState(State state) { states.push(state); } /** * Returns to the previous state. */ private void popState() { states.pop(); } /** * Returns the current state. */ private State currentState() { return states.peek(); } /** * Throws a {@link PatternSyntaxException}. */ private PatternSyntaxException syntaxError(String desc) { throw new PatternSyntaxException(desc, glob, index); } /** * Appends the given character as-is to the regex. */ private void appendExact(char c) { builder.append(c); } /** * Appends the regex form of the given normal character or separator from the glob. */ private void append(char c) { if (separatorMatcher.matches(c)) { appendSeparator(); } else { appendNormal(c); } } /** * Appends the regex form of the given normal character from the glob. */ private void appendNormal(char c) { if (REGEX_RESERVED.matches(c)) { builder.append('\\'); } builder.append(c); } /** * Appends the regex form matching the separators for the path type. */ private void appendSeparator() { appendNormal(separator); } /** * Appends the regex form that matches anything except the separators for the path type. */ private void appendNonSeparator() { builder.append("[^"); appendInBracket(separator); builder.append(']'); } /** * Appends the regex form of the glob ? character. */ private void appendQuestionMark() { appendNonSeparator(); } /** * Appends the regex form of the glob * character. */ private void appendStar() { appendNonSeparator(); builder.append('*'); } /** * Appends the regex form of the glob ** pattern. */ private void appendStarStar() { builder.append(".*"); } /** * Appends the regex form of the start of a glob [] section. */ private void appendBracketStart() { builder.append('['); appendNonSeparator(); builder.append("&&["); } /** * Appends the regex form of the end of a glob [] section. */ private void appendBracketEnd() { builder.append("]]"); } /** * Appends the regex form of the given character within a glob [] section. */ private void appendInBracket(char c) { // escape \ in regex character class if (c == '\\') { builder.append('\\'); } builder.append(c); } /** * Appends the regex form of the start of a glob {} section. */ private void appendCurlyBraceStart() { builder.append('('); } /** * Appends the regex form of the separator (,) within a glob {} section. */ private void appendSubpatternSeparator() { builder.append('|'); } /** * Appends the regex form of the end of a glob {} section. */ private void appendCurlyBraceEnd() { builder.append(')'); } /** * Converter state. */ private abstract static class State { /** * Process the next character with the current state, transitioning the converter to a new * state if necessary. */ abstract void process(GlobToRegex converter, char c); /** * Called after all characters have been read. */ void finish(GlobToRegex converter) { } } /** * Normal state. */ private static final State NORMAL = new State() { @Override void process(GlobToRegex converter, char c) { switch (c) { case '?': converter.appendQuestionMark(); return; case '[': converter.appendBracketStart(); converter.pushState(BRACKET_FIRST_CHAR); return; case '{': converter.appendCurlyBraceStart(); converter.pushState(CURLY_BRACE); return; case '*': converter.pushState(STAR); return; case '\\': converter.pushState(ESCAPE); return; default: converter.append(c); } } }; /** * State following the reading of a single \. */ private static final State ESCAPE = new State() { @Override void process(GlobToRegex converter, char c) { converter.append(c); converter.popState(); } @Override void finish(GlobToRegex converter) { throw converter.syntaxError("Hanging escape (\\) at end of pattern"); } }; /** * State following the reading of a single *. */ private static final State STAR = new State() { @Override void process(GlobToRegex converter, char c) { if (c == '*') { converter.appendStarStar(); converter.popState(); } else { converter.appendStar(); converter.popState(); converter.currentState().process(converter, c); } } @Override void finish(GlobToRegex converter) { converter.appendStar(); } }; /** * State immediately following the reading of a [. */ private static final State BRACKET_FIRST_CHAR = new State() { @Override void process(GlobToRegex converter, char c) { if (c == ']') { // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for example) // but doesn't work for the default java.nio.file implementations. In the cases of "[]]" it // produces: // java.util.regex.PatternSyntaxException: Unclosed character class near index 13 // ^[[^/]&&[]]\]$ // ^ // The error here is slightly different, but trying to make this work would require some // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if // someone wants to include a ']' inside a character class, they should escape it. throw converter.syntaxError("Empty []"); } if (c == '!') { converter.appendExact('^'); } else if (c == '-') { converter.appendExact(c); } else { converter.appendInBracket(c); } converter.popState(); converter.pushState(BRACKET); } @Override void finish(GlobToRegex converter) { throw converter.syntaxError("Unclosed ["); } }; /** * State inside [brackets], but not at the first character inside the brackets. */ private static final State BRACKET = new State() { @Override void process(GlobToRegex converter, char c) { if (c == ']') { converter.appendBracketEnd(); converter.popState(); } else { converter.appendInBracket(c); } } @Override void finish(GlobToRegex converter) { throw converter.syntaxError("Unclosed ["); } }; /** * State inside {curly braces}. */ private static final State CURLY_BRACE = new State() { @Override void process(GlobToRegex converter, char c) { switch (c) { case '?': converter.appendQuestionMark(); break; case '[': converter.appendBracketStart(); converter.pushState(BRACKET_FIRST_CHAR); break; case '{': throw converter.syntaxError("{ not allowed in subpattern group"); case '*': converter.pushState(STAR); break; case '\\': converter.pushState(ESCAPE); break; case '}': converter.appendCurlyBraceEnd(); converter.popState(); break; case ',': converter.appendSubpatternSeparator(); break; default: converter.append(c); } } @Override void finish(GlobToRegex converter) { throw converter.syntaxError("Unclosed {"); } }; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy