All Downloads are FREE. Search and download functionalities are using the official Maven repository.

regexcompiler.MyPattern Maven / Gradle / Ivy

Go to download

A tool to perform static analysis on regexes to determine whether they are vulnerable to ReDoS.

There is a newer version: 1.0.8
Show newest version
package regexcompiler;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import nfa.NFAGraph;
import matcher.*;
import regexcompiler.ParseTree.TreeNode;
import regexcompiler.RegexAnchor.RegexAnchorType;
import regexcompiler.RegexEscapedSymbol.RegexEscapedSymbolType;
import regexcompiler.RegexGroup.RegexGroupType;
import regexcompiler.RegexOperator.OperatorType;
import regexcompiler.RegexOperator.RegexUnionOperator;
import regexcompiler.RegexOperator.RegexJoinOperator;
import regexcompiler.RegexQuantifiableOperator.QuantifierType;
import regexcompiler.RegexQuantifiableOperator.RegexPlusOperator;
import regexcompiler.RegexQuantifiableOperator.RegexQuestionMarkOperator;
import regexcompiler.RegexQuantifiableOperator.RegexStarOperator;
import regexcompiler.RegexSubexpression.SubexpressionType;
import regexcompiler.RegexToken.TokenType;

import analysis.AnalysisSettings.NFAConstruction;

/*
 * Known issues: does not parse a*{1,2} In the Java parser the {...} gets ignored: a?{2} matches ε and a not aa
 */

public class MyPattern {

	private NFAGraph nfaGraph;
	
	public static void main(String [] args) {
		if (args.length < 1) {
			System.out.println("Pattern should be specified as a command line argument (and possibly give an input string).");
		}
		if (args.length < 2) {
			String pattern = args[0];
			Tokeniser t = new Tokeniser(pattern);
			List tokenList = t.tokenise();
			
			Parser p = new Parser(pattern, tokenList);
			ParseTree parseTree = p.parse();
			System.out.println(parseTree);
			NFAGraph resultGraph = toNFAGraph(pattern, NFAConstruction.JAVA);
			System.out.println(resultGraph);
		} else {
			String pattern = args[0];
			String inputString = args[1];
			MyPattern myPattern = MyPattern.compile(pattern, NFAConstruction.JAVA);
			MyMatcher myMatcher = myPattern.matcher(inputString);
			boolean matches = myMatcher.matches();
			System.out.println(pattern + " matches " + inputString + ": " + matches);
		}
		
	}
	

	private static final int MAX_REPETITION = Integer.MAX_VALUE;

	private MyPattern(NFAGraph nfaGraph) {
		this.nfaGraph = nfaGraph;
	}

	public static MyPattern compile(String pattern, NFAConstruction construction) {
		NFAGraph nfaGraph = toNFAGraph(pattern, construction);
		return new MyPattern(nfaGraph);
	}

	public MyMatcher matcher(String inputString) {
		if (nfaGraph == null) {
			throw new IllegalStateException("Pattern has not yet been compiled!");
		}
		return new RegexNFAMatcher(nfaGraph, inputString);
	}
	
	public static NFAGraph toNFAGraph(String pattern, NFAConstruction construction) {
		Tokeniser t = new Tokeniser(pattern);
		List tokenList = t.tokenise();
		//System.out.println(tokenList);	
		Parser p = new Parser(pattern, tokenList);
		ParseTree parseTree = p.parse();
		ParseTreeToNFAConverter pttnc;
		switch (construction) {
		case THOMPSON:
			pttnc = new ThompsonParseTreeToNFAConverter();
			break;
		case JAVA:
			pttnc = new JavaParseTreeToNFAConverter();
			break;
		default:
			throw new RuntimeException("Unknown regex flavour");
		}
		
		NFAGraph resultNFA = pttnc.convertParseTree(parseTree);
		return resultNFA;
	}

	private static class Tokeniser {

		private final String pattern;
		private final char[] patternArr;
		private final int length;
		private int i;

		private List tokenList;
		private Stack> tokenListStack;
		private Stack groupTypeStack;

		private boolean verbatimMode;

		private Tokeniser(String pattern) {
			this.pattern = pattern;
			this.patternArr = pattern.toCharArray();
			this.length = patternArr.length;
			this.verbatimMode = false;

		}

		private List tokenise() {
			tokenList = new ArrayList();
			tokenListStack = new Stack>();
			groupTypeStack = new Stack();
			i = 0;
			while (true) {
				if (!verbatimMode) {
					switch (patternArr[i]) {
					case '^':
						RegexAnchor lineStartAnchor = new RegexAnchor(RegexAnchorType.LINESTART, i);
						tokenList.add(lineStartAnchor);
						i++;
						break;
					case '$':
						RegexAnchor lineEndAnchor = new RegexAnchor(RegexAnchorType.LINEEND, i);
						tokenList.add(lineEndAnchor);
						i++;
						break;
					case '[':
						RegexCharacterClass rcc = createTokenCharacterClass();
						tokenList.add(rcc);
						i++;
						break;
					case '(':
						RegexGroupType newGroupType = findGroupType();
						groupTypeStack.push(newGroupType);
						/* preserve token list */
						tokenListStack.push(tokenList);
						/* token list for this group */
						tokenList = new ArrayList();
						
						break;
					case ')':
						RegexGroupType currentGroupType = groupTypeStack.pop();
						RegexGroup rg = new RegexGroup(tokenList, currentGroupType, i);
						tokenList = tokenListStack.pop();
						tokenList.add(rg);
						i++;
						break;
					case '\\':
						i++;
						switch (patternArr[i]) {
						case 'Q':
							this.verbatimMode = true;
							i++;
							break;
						default:
							RegexEscapedSymbol res = createTokenEscapedSymbol();
							tokenList.add(res);
						} // End switch

						break;
					case '|':
						RegexUnionOperator unionOperator = new RegexUnionOperator(i);
						tokenList.add(unionOperator);
						i++;
						break;
					case '*':
						RegexStarOperator starOperator = (RegexStarOperator) createQuantifiableOperator(OperatorType.STAR);
						tokenList.add(starOperator);
						break;
					case '+':
						RegexPlusOperator plusOperator = (RegexPlusOperator) createQuantifiableOperator(OperatorType.PLUS);
						tokenList.add(plusOperator);
						break;
					case '?':
						RegexQuestionMarkOperator questionMarkOperator = (RegexQuestionMarkOperator) createQuantifiableOperator(OperatorType.QUESTION_MARK);
						tokenList.add(questionMarkOperator);
						break;
					case '{':
						RegexCountClosureOperator countedClosureOperator;
						QuantifierType countedClosureQuantifier;
						StringBuilder countClosureOperatorBuilder = new StringBuilder();
						try {
							i++;

							while (patternArr[i] != '}') {
								countClosureOperatorBuilder.append(patternArr[i]);
								i++;
							}
							i++;
							if (i < length && patternArr[i] == '?') {
								countedClosureQuantifier = QuantifierType.RELUCTANT;
								i++;
							} else if (i < length && patternArr[i] == '+') {
								countedClosureQuantifier = QuantifierType.POSSESSIVE;
								i++;
							} else {
								countedClosureQuantifier = QuantifierType.GREEDY;
								/* Leave i for unknown token */
							}
						} catch (ArrayIndexOutOfBoundsException aiooe) {
							throw new PatternSyntaxException("Unclosed counted closure", pattern, i);
						}

						String bounds = countClosureOperatorBuilder.toString();
						Pattern boundedPattern = Pattern.compile("(\\d+),(\\d+)");
						Pattern unboundedPattern = Pattern.compile("(\\d+),");
						Pattern constantRepititionPattern = Pattern.compile("(\\d+)");

						Matcher boundedMatcher = boundedPattern.matcher(bounds);
						Matcher unboundedMatcher = unboundedPattern.matcher(bounds);
						Matcher constantRepititionMatcher = constantRepititionPattern.matcher(bounds);
						int low,
						high;
						if (boundedMatcher.find()) {
							String lowStr = boundedMatcher.group(1);
							low = Integer.parseInt(lowStr);
							String highStr = boundedMatcher.group(2);
							high = Integer.parseInt(highStr);

							if (high < low || low < 0 || high > MAX_REPETITION) {
								throw new PatternSyntaxException("Illegal repetition range", pattern, i);
							}

							countedClosureOperator = new RegexCountClosureOperator(low, high, countedClosureQuantifier, i);
							tokenList.add(countedClosureOperator);

						} else if (unboundedMatcher.find()) {
							String lowStr = unboundedMatcher.group(1);
							low = Integer.parseInt(lowStr);

							if (low < 0 || low > MAX_REPETITION) {
								throw new PatternSyntaxException("Illegal repetition range", pattern, i);
							}
							high = MAX_REPETITION;
							countedClosureOperator = new RegexCountClosureOperator(low, high, countedClosureQuantifier, i);
							tokenList.add(countedClosureOperator);

						} else if (constantRepititionMatcher.find()) {
							String lowStr = constantRepititionMatcher.group(1);
							low = Integer.parseInt(lowStr);

							if (low < 0 || low > MAX_REPETITION) {
								throw new PatternSyntaxException("Illegal repetition range", pattern, i);
							}
							high = low;
							countedClosureOperator = new RegexCountClosureOperator(low, high, countedClosureQuantifier, i);
							tokenList.add(countedClosureOperator);
						} else {
							throw new PatternSyntaxException("Illegal repetition range", pattern, i);
						}
						break;
					default:
						RegexSymbol rs = new RegexSymbol("" + patternArr[i], i);
						tokenList.add(rs);
						i++;
					} // End switch
				} else {
					// verbatimMode is true
					if (patternArr[i] == '\\') {
						i++;
						if (i < length && patternArr[i] == 'E') {
							i++;
							verbatimMode = false;
						} else {
							RegexSymbol rs = new RegexSymbol("\\", i);
							tokenList.add(rs);
							rs = new RegexSymbol("" + patternArr[i], i);
							tokenList.add(rs);
							i++;
						}
					} else {
						RegexSymbol rs = new RegexSymbol("" + patternArr[i], i);
						tokenList.add(rs);
						i++;
					}
				} // End if/else

				if (i >= length) {
					break;
				}
			}
			return tokenList;

		}

		private RegexCharacterClass createTokenCharacterClass() {
			/*
			 * read until the next unescaped ] is found and add it to a
			 * character class
			 */
			StringBuilder characterClassBuilder = new StringBuilder();
			try {
				i++;
				int depthCounter = 1;
				while (true) {
					if (patternArr[i] == '[') {
						depthCounter++;
					} else if (patternArr[i] == ']') {
						depthCounter--;
						if (depthCounter == 0) {
							break;
						}
					}
					if (patternArr[i] == '\\') {
						/* do not interpret escaped character */
						i++;
						characterClassBuilder.append("\\" + patternArr[i]);
					} else {
						characterClassBuilder.append(patternArr[i]);
					}
					i++;	
				}
			} catch (ArrayIndexOutOfBoundsException aioobe) {
				throw new PatternSyntaxException("Unclosed character class", pattern, i);
			}
			return new RegexCharacterClass(characterClassBuilder.toString(), i);
		}

		private RegexEscapedSymbol createTokenEscapedSymbol() {
			RegexEscapedSymbol res;
			switch (patternArr[i]) {
			case 'x':
				StringBuilder hexStringBuilder = new StringBuilder();
				try {
					i++;
					if (patternArr[i] == '{') {
						i++;
						while (patternArr[i] != '}') {
							hexStringBuilder.append(patternArr[i]);
							i++;
						}
						i++;
					} else {
						hexStringBuilder.append(patternArr[i]);
						i++;
						hexStringBuilder.append(patternArr[i]);
						i++;
					}
				} catch (ArrayIndexOutOfBoundsException aioobe) {
					throw new PatternSyntaxException("Unclosed hexadecimal escape sequence", pattern, i);
				}
				res = new RegexEscapedSymbol(hexStringBuilder.toString(), RegexEscapedSymbolType.HEX, i);
				break;
			case '0':
				StringBuilder octStringBuilder = new StringBuilder();
				try {
					int tmpNum = 0;
					int octalDigitCounter = 0;
					i++;
					if ('0' > patternArr[i] || patternArr[i] > '7') {
						throw new PatternSyntaxException("Illegal octal escape sequence", pattern, i);
					}
					while (i < length && tmpNum < 0377 && ('0' <= patternArr[i] && patternArr[i] <= '7')
							&& octalDigitCounter < 3) {
						octStringBuilder.append(patternArr[i]);
						tmpNum = Integer.parseInt(octStringBuilder.toString(), 8);
						octalDigitCounter++;
						i++;
					}
				} catch (NumberFormatException nfe) {
					throw new PatternSyntaxException("Illegal octal escape sequence", pattern, i);
				}

				res = new RegexEscapedSymbol(octStringBuilder.toString(), RegexEscapedSymbolType.OCTAL, i);
				break;
			case 'u':
				StringBuilder unicodeStringBuilder = new StringBuilder();
				try {
					i++;
					for (int j = 0; j < 4; j++) {
						unicodeStringBuilder.append(patternArr[i]);
						i++;
					}

				} catch (ArrayIndexOutOfBoundsException aioobe) {
					throw new PatternSyntaxException("Illegal unicode escape sequence", pattern, i);
				}

				res = new RegexEscapedSymbol(unicodeStringBuilder.toString(), RegexEscapedSymbolType.UNICODE, i);
				break;
			case 'p':
				StringBuilder characterPropertyBuilder = new StringBuilder();
				try {
					i++;
					if (patternArr[i] == '{') {
						i++;

						while (patternArr[i] != '}') {
							characterPropertyBuilder.append(patternArr[i]);
							i++;
						}
						i++;
					} else {
						characterPropertyBuilder.append(patternArr[i]);
						i++;
					}
				} catch (ArrayIndexOutOfBoundsException aioobe) {
					throw new PatternSyntaxException("Unclosed character family escape sequence", pattern, i);
				}

				res = new RegexEscapedSymbol(characterPropertyBuilder.toString(), RegexEscapedSymbolType.CHARACTER_PROPERTY, i);
				break;
			default:
				String escapedChar;
				try {
					escapedChar = "" + patternArr[i];
					i++;
				} catch (ArrayIndexOutOfBoundsException aioobe) {
					throw new PatternSyntaxException("Unexpected internal error", pattern, i);
				}
				res = new RegexEscapedSymbol(escapedChar, RegexEscapedSymbolType.CHARACTER, i);
				break;
			} // End switch
			return res;
		}

		private RegexQuantifiableOperator createQuantifiableOperator(OperatorType ot) {
			i++;
			QuantifierType quantifierType;
			if (i < length) {
				if (patternArr[i] == '?') {
					i++;
					quantifierType = QuantifierType.RELUCTANT;
				} else if (patternArr[i] == '+') {
					i++;
					quantifierType = QuantifierType.POSSESSIVE;
				} else {
					/* leave i's value for next token */
					quantifierType = QuantifierType.GREEDY;
				}
			} else {
				quantifierType = QuantifierType.GREEDY;
			}
			switch (ot) {
			case PLUS:
				return new RegexPlusOperator(quantifierType, i);
			case QUESTION_MARK:
				return new RegexQuestionMarkOperator(quantifierType, i);
			case STAR:
				return new RegexStarOperator(quantifierType, i);
			default:
				throw new RuntimeException("Unkown oeprator: " + ot);
			}
		}
	
		private RegexGroupType findGroupType() {
			RegexGroupType groupType = RegexGroupType.NORMAL;
			if (i < length - 2 && patternArr[i + 1] == '?') {
				if (patternArr[i + 2] == '<') {					
					if (i < length - 3) {
						
						/* check for look behind */
						switch (patternArr[i + 3]) {
						case '=':
							groupType = RegexGroupType.POSLOOKBEHIND;
							break;
						case '!':							
							groupType = RegexGroupType.NEGLOOKBEHIND;
							break;
						default:
							throw new PatternSyntaxException("Unkown look-behind group", pattern, i);

						}
					} else {
						throw new PatternSyntaxException("Unkown look-behind group", pattern, i);
					}
					i += 4;
				} else {
					switch (patternArr[i + 2]) {
					case ':':
						groupType = RegexGroupType.NONCAPTURING;
						break;
					case '=':
						groupType = RegexGroupType.POSLOOKAHEAD;
						break;
					case '!':
						groupType = RegexGroupType.NEGLOOKAHEAD;
						break;
					default:
						throw new PatternSyntaxException("Unkown inline modifier", pattern, i);

					}
					i += 3;
				}
			} else {
				i++;
			}
			return groupType;
		}
	}

	private static class Parser {
		
		private final String pattern;
		private final List tokenList;
		
		private Iterator tokenIterator;
		private RegexToken currentToken;
		private int index;
		private boolean endOfStream;
		private boolean isNested;
		
		private boolean nextToken() {
			if (tokenIterator.hasNext()) {
				currentToken = tokenIterator.next();
				index = currentToken.getIndex();
				return true;
			}
			currentToken = null;
			endOfStream = true;
			return false;
			
		}
		
		public Parser(String pattern, List tokenList) {
			this.isNested = false;
			this.pattern = pattern;
			this.tokenList = tokenList;
		}
		
		private Parser(boolean isNested, String pattern, List tokenList) {
			this.isNested = true;
			this.pattern = pattern;
			this.tokenList = tokenList;
		}
		
		public ParseTree parse() {
			tokenIterator = tokenList.iterator();
			endOfStream = false;
			nextToken();
			TreeNode root = parseRegex();
			ParseTree pt = new ParseTree();
			pt.newRoot(root);	
			
			return pt;
		}
		
		public TreeNode parseRegex() {
			//System.out.println("Parse Regex");
			if (currentToken.getTokenType() == TokenType.ANCHOR) {
				RegexAnchor regexAnchorToken = (RegexAnchor) currentToken;
				if (regexAnchorToken.getAnchorType() == RegexAnchorType.LINESTART && !isNested) {
					nextToken();
					/* Since we assume line based matching, we ignore the caret at the start */
				} else {
					throw new UnimplementedFunctionalityException("Anchor at invalid position: " + regexAnchorToken.getAnchorType() + " at " + regexAnchorToken.getIndex());
				}
			}
			TreeNode root = parseTerm();
			while (checkEndOfTerm()) {
				TreeNode operatorNode = new TreeNode(currentToken);
				operatorNode.addChild(root);
				if(nextToken()) {
					TreeNode nextTermNode = parseTerm();
					operatorNode.addChild(nextTermNode);
					root = operatorNode;
				} else {
					/* Create an empty factor */
				}
				
			}
			if (!endOfStream && currentToken.getTokenType() == TokenType.ANCHOR) {
				RegexAnchor regexAnchorToken = (RegexAnchor) currentToken;
				nextToken();
				if (regexAnchorToken.getAnchorType() == RegexAnchorType.LINEEND && !isNested) {
					/* Since we assume line based matching, we ignore the dollar at the line end */
				} else {
					throw new UnimplementedFunctionalityException("Anchor at invalid position: " + regexAnchorToken.getAnchorType() + " at " + regexAnchorToken.getIndex());
				}
			}
			if (!endOfStream) {
				throw new PatternSyntaxException("Dangling meta character '" + currentToken + "'", pattern, currentToken.getIndex());
			}
			//System.out.println("END Parse Regex");
			return root;
			
		}
		
		public TreeNode parseTerm() {
			//System.out.println("Parse Term");
			TreeNode root;
			if (currentToken.getTokenType() == TokenType.SUBEXPRESSION) {
				root = parseFactor();
			} else if (currentToken.getTokenType() == TokenType.OPERATOR) {
				/* empty factor */
				RegexOperator operatorToken = (RegexOperator) currentToken;
				if (operatorToken.getIsBinaryOperator()) {
					return new TreeNode(new RegexSymbol("", index));
				} else {
					throw new PatternSyntaxException("Dangling meta character '" + operatorToken + "'", pattern, operatorToken.getIndex());
				}
				
			} else if (currentToken.getTokenType() == TokenType.ANCHOR) {
				RegexAnchor regexAnchorToken = (RegexAnchor) currentToken;
				throw new UnimplementedFunctionalityException("Anchor at invalid position: " + regexAnchorToken.getAnchorType() + " at " + regexAnchorToken.getIndex());
			} else {
				throw new RuntimeException("Unknown token type: " + currentToken.getTokenType());
			}
			while (!endOfStream && currentToken.getTokenType() == TokenType.SUBEXPRESSION) {
				TreeNode subexpressionNode = parseFactor();
				/* TODO implement lookaround here */
				TreeNode joinOperatorNode = new TreeNode(new RegexJoinOperator(index));
				joinOperatorNode.addChild(root);
				joinOperatorNode.addChild(subexpressionNode);
				root = joinOperatorNode;
			}
			//System.out.println("END Parse Term");
			return root;
		}
		
		public TreeNode parseFactor() {
			//System.out.println("Parse Factor");
			TreeNode root;
			if (currentToken.getTokenType() == TokenType.SUBEXPRESSION) {
				RegexSubexpression subexpressionToken = (RegexSubexpression) currentToken;
				
				if (subexpressionToken.getSubexpressionType() == SubexpressionType.GROUP) {
					RegexGroup groupToken = (RegexGroup) subexpressionToken;
					RegexGroupType groupTokenType = groupToken.getGroupType();
					/* TODO if this works, remove switch and handle all groups equally */
					switch (groupTokenType) {
					case NEGLOOKAHEAD: {
						//throw new UnimplementedFunctionalityException("Negative lookahead has not yet been implemented.");
						Parser p = new Parser(true, pattern, groupToken.getSubexpressionContent());
						ParseTree pt = p.parse();
						/* We add a group node so that the info on which type of group (?: or ?<= Etc) does not go missing  */
						TreeNode groupNode = new TreeNode(groupToken);
						groupNode.addChild(pt.getRoot());
						root = groupNode;
						break;
					}
					case NEGLOOKBEHIND: {
						//throw new UnimplementedFunctionalityException("Negative lookbehind has not yet been implemented.");
						Parser p = new Parser(true, pattern, groupToken.getSubexpressionContent());
						ParseTree pt = p.parse();
						/* We add a group node so that the info on which type of group (?: or ?<= Etc) does not go missing  */
						TreeNode groupNode = new TreeNode(groupToken);
						groupNode.addChild(pt.getRoot());
						root = groupNode;
						break;
					}
					case NONCAPTURING: {
						/* we do not perform capturing, so handle noncapturing groups as normal groups */
						Parser p = new Parser(true, pattern, groupToken.getSubexpressionContent());
						ParseTree pt = p.parse();
						/* We add a group node so that the info on which type of group (?: or ?<= Etc) does not go missing  */
						TreeNode groupNode = new TreeNode(groupToken);
						groupNode.addChild(pt.getRoot());
						root = groupNode;
						break;
					}
					case NORMAL: {
						Parser p = new Parser(true, pattern, groupToken.getSubexpressionContent());
						ParseTree pt = p.parse();
						/* We add a group node so that the info on which type of group (?: or ?<= Etc) does not go missing  */
						TreeNode groupNode = new TreeNode(groupToken);
						groupNode.addChild(pt.getRoot());
						root = groupNode;
						break;
					}
					case POSLOOKAHEAD: {
						//throw new UnimplementedFunctionalityException("Positive lookahead has not yet been implemented.");
						Parser p = new Parser(true, pattern, groupToken.getSubexpressionContent());
						ParseTree pt = p.parse();
						/* We add a group node so that the info on which type of group (?: or ?<= Etc) does not go missing  */
						TreeNode groupNode = new TreeNode(groupToken);
						groupNode.addChild(pt.getRoot());
						root = groupNode;
						break;
					}
					case POSLOOKBEHIND: {
						//throw new UnimplementedFunctionalityException("Positive lookbehind has not yet been implemented.");
						Parser p = new Parser(true, pattern, groupToken.getSubexpressionContent());
						ParseTree pt = p.parse();
						/* We add a group node so that the info on which type of group (?: or ?<= Etc) does not go missing  */
						TreeNode groupNode = new TreeNode(groupToken);
						groupNode.addChild(pt.getRoot());
						root = groupNode;
						break;
					}
					default:
						throw new RuntimeException("Unknown group type: " + groupTokenType);
						//break;
					
					}
				} else {
					TreeNode subexpressionNode = new TreeNode(currentToken);
					root = subexpressionNode;
				}
				
				/* search for unary operator */
				if (nextToken()) {
					if (currentToken.getTokenType() == TokenType.OPERATOR) {
						RegexOperator currentOperator = (RegexOperator) currentToken;
						if (!currentOperator.getIsBinaryOperator()) {
							TreeNode unaryOperatorNode = new TreeNode(currentOperator);
							unaryOperatorNode.addChild(root);
							root = unaryOperatorNode;
							nextToken();
						} else {
							/* leave for End of term */
						}
					} else {
						/* leave for next parseFactor */
					}
				}
				
			} else if (currentToken.getTokenType() == TokenType.ANCHOR) {
				RegexAnchor regexAnchorToken = (RegexAnchor) currentToken;
				throw new UnimplementedFunctionalityException("Anchor at invalid position: " + regexAnchorToken.getAnchorType() + " at " + regexAnchorToken.getIndex());
			} else {
				if (checkEndOfTerm()) {
					/* return empty factor */
					return new TreeNode(new RegexSymbol("", index));
				} else {
					/* error dangling meta character */
					throw new PatternSyntaxException("Dangling meta character", pattern, index);
				}
			}
			//System.out.println("END Parse Factor");
			return root;
		}
		
		private boolean checkEndOfTerm() {
			return !endOfStream && (currentToken.getTokenType() == TokenType.OPERATOR && ((RegexOperator) currentToken).getIsBinaryOperator());
		}
		
	}

	static class RegexNFAMatcher extends NFAMatcher {
		private RegexNFAMatcher(NFAGraph nfaGraph, String inputString) {
			super(nfaGraph, inputString);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy