org.cryptomator.cryptofs.GlobToRegex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cryptofs Show documentation
This library provides the Java filesystem provider used by Cryptomator.
The newest version!
/*
 * Copyright 2013, 2016 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.cryptomator.cryptofs;

import static com.google.common.base.Preconditions.checkNotNull;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.regex.PatternSyntaxException;

import com.google.common.base.CharMatcher;

/**
 * Translates globs to regex patterns.
 *
 * @author Colin Decker
 */
class GlobToRegex {

	/**
	 * Converts the given glob to a regular expression pattern. The given separators determine what
	 * characters the resulting expression breaks on for glob expressions such as * which should not
	 * cross directory boundaries.
	 *
	 * 
	 * Basic conversions (assuming / as only separator):
	 *
	 * 
	 * ?        = [^/]
	 * *        = [^/]*
	 * **       = .*
	 * [a-z]    = [[^/]&&[a-z]]
	 * [!a-z]   = [[^/]&&[^a-z]]
	 * {a,b,c}  = (a|b|c)
	 * 

	 */
	public static String toRegex(String glob, char separator) {
		return new GlobToRegex(glob, separator).convert();
	}

	private static final CharMatcher REGEX_RESERVED = CharMatcher.anyOf("^$.?+*\\[]{}()");

	private final String glob;
	private final char separator;
	private final CharMatcher separatorMatcher;

	private final StringBuilder builder = new StringBuilder();
	private final Deque states = new ArrayDeque<>();
	private int index;

	private GlobToRegex(String glob, char separator) {
		this.glob = checkNotNull(glob);
		this.separator = separator;
		this.separatorMatcher = CharMatcher.anyOf(String.valueOf(separator));
	}

	/**
	 * Converts the glob to a regex one character at a time. A state stack (states) is maintained,
	 * with the state at the top of the stack being the current state at any given time. The current
	 * state is always used to process the next character. When a state processes a character, it may
	 * pop the current state or push a new state as the current state. The resulting regex is written
	 * to {@code builder}.
	 */
	private String convert() {
		pushState(NORMAL);
		for (index = 0; index < glob.length(); index++) {
			currentState().process(this, glob.charAt(index));
		}
		currentState().finish(this);
		return builder.toString();
	}

	/**
	 * Enters the given state. The current state becomes the previous state.
	 */
	private void pushState(State state) {
		states.push(state);
	}

	/**
	 * Returns to the previous state.
	 */
	private void popState() {
		states.pop();
	}

	/**
	 * Returns the current state.
	 */
	private State currentState() {
		return states.peek();
	}

	/**
	 * Throws a {@link PatternSyntaxException}.
	 */
	private PatternSyntaxException syntaxError(String desc) {
		throw new PatternSyntaxException(desc, glob, index);
	}

	/**
	 * Appends the given character as-is to the regex.
	 */
	private void appendExact(char c) {
		builder.append(c);
	}

	/**
	 * Appends the regex form of the given normal character or separator from the glob.
	 */
	private void append(char c) {
		if (separatorMatcher.matches(c)) {
			appendSeparator();
		} else {
			appendNormal(c);
		}
	}

	/**
	 * Appends the regex form of the given normal character from the glob.
	 */
	private void appendNormal(char c) {
		if (REGEX_RESERVED.matches(c)) {
			builder.append('\\');
		}
		builder.append(c);
	}

	/**
	 * Appends the regex form matching the separators for the path type.
	 */
	private void appendSeparator() {
		appendNormal(separator);
	}

	/**
	 * Appends the regex form that matches anything except the separators for the path type.
	 */
	private void appendNonSeparator() {
		builder.append("[^");
		appendInBracket(separator);
		builder.append(']');
	}

	/**
	 * Appends the regex form of the glob ? character.
	 */
	private void appendQuestionMark() {
		appendNonSeparator();
	}

	/**
	 * Appends the regex form of the glob * character.
	 */
	private void appendStar() {
		appendNonSeparator();
		builder.append('*');
	}

	/**
	 * Appends the regex form of the glob ** pattern.
	 */
	private void appendStarStar() {
		builder.append(".*");
	}

	/**
	 * Appends the regex form of the start of a glob [] section.
	 */
	private void appendBracketStart() {
		builder.append('[');
		appendNonSeparator();
		builder.append("&&[");
	}

	/**
	 * Appends the regex form of the end of a glob [] section.
	 */
	private void appendBracketEnd() {
		builder.append("]]");
	}

	/**
	 * Appends the regex form of the given character within a glob [] section.
	 */
	private void appendInBracket(char c) {
		// escape \ in regex character class
		if (c == '\\') {
			builder.append('\\');
		}

		builder.append(c);
	}

	/**
	 * Appends the regex form of the start of a glob {} section.
	 */
	private void appendCurlyBraceStart() {
		builder.append('(');
	}

	/**
	 * Appends the regex form of the separator (,) within a glob {} section.
	 */
	private void appendSubpatternSeparator() {
		builder.append('|');
	}

	/**
	 * Appends the regex form of the end of a glob {} section.
	 */
	private void appendCurlyBraceEnd() {
		builder.append(')');
	}

	/**
	 * Converter state.
	 */
	private abstract static class State {
		/**
		 * Process the next character with the current state, transitioning the converter to a new
		 * state if necessary.
		 */
		abstract void process(GlobToRegex converter, char c);

		/**
		 * Called after all characters have been read.
		 */
		void finish(GlobToRegex converter) {
		}
	}

	/**
	 * Normal state.
	 */
	private static final State NORMAL = new State() {
		@Override
		void process(GlobToRegex converter, char c) {
			switch (c) {
			case '?':
				converter.appendQuestionMark();
				return;
			case '[':
				converter.appendBracketStart();
				converter.pushState(BRACKET_FIRST_CHAR);
				return;
			case '{':
				converter.appendCurlyBraceStart();
				converter.pushState(CURLY_BRACE);
				return;
			case '*':
				converter.pushState(STAR);
				return;
			case '\\':
				converter.pushState(ESCAPE);
				return;
			default:
				converter.append(c);
			}
		}
	};

	/**
	 * State following the reading of a single \.
	 */
	private static final State ESCAPE = new State() {
		@Override
		void process(GlobToRegex converter, char c) {
			converter.append(c);
			converter.popState();
		}

		@Override
		void finish(GlobToRegex converter) {
			throw converter.syntaxError("Hanging escape (\\) at end of pattern");
		}
	};

	/**
	 * State following the reading of a single *.
	 */
	private static final State STAR = new State() {
		@Override
		void process(GlobToRegex converter, char c) {
			if (c == '*') {
				converter.appendStarStar();
				converter.popState();
			} else {
				converter.appendStar();
				converter.popState();
				converter.currentState().process(converter, c);
			}
		}

		@Override
		void finish(GlobToRegex converter) {
			converter.appendStar();
		}
	};

	/**
	 * State immediately following the reading of a [.
	 */
	private static final State BRACKET_FIRST_CHAR = new State() {
		@Override
		void process(GlobToRegex converter, char c) {
			if (c == ']') {
				// A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for example)
				// but doesn't work for the default java.nio.file implementations. In the cases of "[]]" it
				// produces:
				// java.util.regex.PatternSyntaxException: Unclosed character class near index 13
				// ^[[^/]&&[]]\]$
				// ^
				// The error here is slightly different, but trying to make this work would require some
				// kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
				// someone wants to include a ']' inside a character class, they should escape it.
				throw converter.syntaxError("Empty []");
			}
			if (c == '!') {
				converter.appendExact('^');
			} else if (c == '-') {
				converter.appendExact(c);
			} else {
				converter.appendInBracket(c);
			}
			converter.popState();
			converter.pushState(BRACKET);
		}

		@Override
		void finish(GlobToRegex converter) {
			throw converter.syntaxError("Unclosed [");
		}
	};

	/**
	 * State inside [brackets], but not at the first character inside the brackets.
	 */
	private static final State BRACKET = new State() {
		@Override
		void process(GlobToRegex converter, char c) {
			if (c == ']') {
				converter.appendBracketEnd();
				converter.popState();
			} else {
				converter.appendInBracket(c);
			}
		}

		@Override
		void finish(GlobToRegex converter) {
			throw converter.syntaxError("Unclosed [");
		}
	};

	/**
	 * State inside {curly braces}.
	 */
	private static final State CURLY_BRACE = new State() {
		@Override
		void process(GlobToRegex converter, char c) {
			switch (c) {
			case '?':
				converter.appendQuestionMark();
				break;
			case '[':
				converter.appendBracketStart();
				converter.pushState(BRACKET_FIRST_CHAR);
				break;
			case '{':
				throw converter.syntaxError("{ not allowed in subpattern group");
			case '*':
				converter.pushState(STAR);
				break;
			case '\\':
				converter.pushState(ESCAPE);
				break;
			case '}':
				converter.appendCurlyBraceEnd();
				converter.popState();
				break;
			case ',':
				converter.appendSubpatternSeparator();
				break;
			default:
				converter.append(c);
			}
		}

		@Override
		void finish(GlobToRegex converter) {
			throw converter.syntaxError("Unclosed {");
		}
	};
}