info.codesaway.util.regex.Refactor Maven / Gradle / Ivy
Show all versions of regexplus Show documentation
package info.codesaway.util.regex;
import static info.codesaway.util.regex.Matcher.getAbsoluteGroupIndex;
import static info.codesaway.util.regex.Pattern.CASE_INSENSITIVE;
import static info.codesaway.util.regex.Pattern.COMMENTS;
import static info.codesaway.util.regex.Pattern.DOTALL;
import static info.codesaway.util.regex.Pattern.DOTNET_NUMBERING;
import static info.codesaway.util.regex.Pattern.DUPLICATE_NAMES;
import static info.codesaway.util.regex.Pattern.EXPLICIT_CAPTURE;
import static info.codesaway.util.regex.Pattern.MULTILINE;
import static info.codesaway.util.regex.Pattern.PERL_OCTAL;
import static info.codesaway.util.regex.Pattern.UNICODE_CASE;
import static info.codesaway.util.regex.Pattern.UNICODE_CHARACTER_CLASS;
import static info.codesaway.util.regex.Pattern.UNIX_LINES;
import static info.codesaway.util.regex.Pattern.VERIFY_GROUPS;
import static info.codesaway.util.regex.Pattern.getMappingName;
import static info.codesaway.util.regex.Pattern.naturalCompareTo;
import static info.codesaway.util.regex.Pattern.wrapIndex;
import static info.codesaway.util.regex.RefactorUtility.anyGroupName;
import static info.codesaway.util.regex.RefactorUtility.fail;
import static info.codesaway.util.regex.RefactorUtility.getDigitCountPattern;
import static info.codesaway.util.regex.RefactorUtility.hexCodeFormat;
import static info.codesaway.util.regex.RefactorUtility.isAnyGroup;
import static info.codesaway.util.regex.RefactorUtility.neverUsedMappingName;
import static info.codesaway.util.regex.RefactorUtility.nonCaptureGroup;
import static info.codesaway.util.regex.RefactorUtility.parseInt;
import static info.codesaway.util.regex.RefactorUtility.perl_octal;
import static info.codesaway.util.regex.RefactorUtility.posixClasses;
import static info.codesaway.util.regex.RefactorUtility.startNonCaptureGroup;
import static info.codesaway.util.regex.RefactorUtility.unicodeFormat;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.TreeSet;
import info.codesaway.util.Differences;
/**
* Class used to refactor a regular expression with "advanced" patternSyntax
* to an equivalent form usable in Java and other RegEx engines.
*
*
* Depending on the flags set when compiling the pattern, additional
* simplifications may be performed. Note these additional simplifications
* are not required to use the pattern in Java. Rather, they are intended to
* refactor the regular expression to a form usable by "any" RegEx engine
* (including "basic" ones found in text editors, renamers, and other
* software).
*
*/
class Refactor {
/** The pattern. */
private final Pattern pattern;
/** The regular expression to refactor. */
private final String regex;
// Refactor parent object
// (for cases like subroutines)
// private final Refactor refactor;
/** the subpattern */
private final Subpattern subpattern;
/*
* These values are used during the refactoring process, and were made
* class members to reduce the need to pass them as parameters to the
* different functions called during the refactoring process.
*
* Note: these values may change. The value is based on the current step
* of the refactoring process and how much the string has been parsed
* during that step.
*/
/** The text for the matcher in the refactoring step **/
private String text;
/**
* The flags
*/
private int flags;
/** Used to match the different parts to refactor. */
private java.util.regex.Matcher matcher;
/**
* Contains the current result of the refactoring process.
*
* Note: this is the StringBuffer used when calling
* {@link java.util.regex.Matcher#appendReplacement(StringBuffer, String)} and
* {@link java.util.regex.Matcher#appendTail(StringBuffer)}
*/
private StringBuffer result;
/**
* Stores the matching String for matcher - the return value of {@link java.util.regex.Matcher#group()}
*/
private String match;
/** The number of unclosed opening parenthesis. */
private int parenthesisDepth;
/** The number of unclosed opening square brackets. */
private int charClassDepth;
/**
* Used to track if currently in a curly brace '{' through '}'
*
* Use null
to represent that no further tracking should be performed (such as if there are two open
* curly braces in a row, which is never allowed)
*/
private Boolean isInCurlyBrace = false;
/**
* Used to track if currently in a comments block
*
*
Starts with "#" when {@link Pattern#COMMENTS} is enabled
*/
private boolean isInComments = false;
/**
* The total number of capture groups (includes capturing groups added
* as part of the refactoring).
*/
private int totalGroups;
/**
* Total number of unnamed groups
*/
private int unnamedGroupCount;
/**
* The number of capture groups (excludes capture groups added as part
* of the refactoring).
*/
private int currentGroup;
/**
* The current named group (starts at 1)
*/
private int namedGroup;
/**
* The current unnamed group (starts at 1)
*/
private int unnamedGroup;
/** The index for the first non-null group. */
private int group;
/**
* A list with each index the mapping name for the repective perl group.
*
* Index 0 in the list refers to group 1
* (only used if {@link Pattern#DOTNET_NUMBERING} is set).
*/
private HashMap perlGroupMapping;
/**
* A map with mappings from the mapping name of a capture group to the
* group index of the empty capture group used to test whether the group
* matched or not.
*/
private final Map testConditionGroups = new HashMap<>(2);
/**
* Set of group names that have an "any group" back reference
*/
private final Set anyGroupReferences = new HashSet<>(2);
/** The capture groups that require a testing group. */
private final TreeSet requiresTestingGroup = new TreeSet<>();
/**
* A stack of states used to track when a testing group should be added
* to a capture group.
*/
private final Stack addTestGroup = new Stack<>();
/**
* A stack of states used to track the else branch of a conditional
* subpattern.
*/
private final Stack handleElseBranch = new Stack<>();
/**
* A stack of states used to track the end of the assertion in an assert
* conditional
*/
private final Stack handleEndAssertCond = new Stack<>();
/**
* A stack of states used to track the branches in a branch reset
* subpattern.
*/
private final Stack branchReset = new Stack<>();
/**
* A stack used to store the current flags.
*
*
* When entering a new parenthesis grouping, the current flags are
* saved. This value is later restored upon existing the parenthesis
* grouping.
*
*/
private final Stack flagsStack = new Stack<>();
/**
* A map with mappings from group name to the group count for
* that group name.
*/
private Map groupCounts = new HashMap<>(2);
/**
* The differences (insertions, deletions, and replacements) made to the
* original regular expression during the latest refactoring step.
*
* After each step, these differences are added to the list of
* changes.
*/
Differences differences = new Differences();
/**
* The differences (insertions, deletions, and replacements) made to the
* original regular expression during the refactoring process.
*
* If the refactored regular expression throws a {@link PatternSyntaxException}, this field is used to map
* the index
* for the exception to its respective index in the original regular
* expression.
*/
Differences changes = new Differences();
/**
* List of changes performed during the pre-refactoring step
*/
private final Differences preRefactoringChanges;
/**
* Mapping from integer (incremental) to index for error
*/
private Map errorTrace;
/**
* Indicates if the current VM is Java 1.5
*/
private final boolean isJava1_5;
/** The subpatterns. */
private final Map subpatterns;
/** The open subpatterns. */
private final Map openSubpatterns = new HashMap<>();
/**
* Used to differentiate between named and unnamed capture groups, back
* references, etc. during refactoring. Different steps may be taken
* depending if a name or number is used.
*/
private static final boolean FOR_NAME = true;
/** Used to specify that the operation occurred during the prerefactor step. */
private static final boolean DURING_PREREFACTOR = true;
/** Used as the mapped index for a group whose target is unknown. */
static final int TARGET_UNKNOWN = -1;
static final String newLine = System.getProperty("line.separator");
/* Error messages */
static final String ASSERTION_EXPECTED = "Assertion expected after (?(";
static final String CONDITIONAL_BRANCHES = "Conditional group contains more than two branches";
static final String DUPLICATE_NAME = "Two named subpatterns have the same name";
static final String ILLEGAL_OCTAL_ESCAPE = "Illegal octal escape sequence";
static final String INTERNAL_ERROR = "An unexpected internal error has occurred";
static final String INVALID_BASE = "Invalid base";
static final String INVALID_CONDITION0 = "Invalid condition (?(0)";
static final String INVALID_DIGIT_END = "Invalid digit in end of range";
static final String INVALID_DIGIT_START = "Invalid digit in start of range";
/**
* Uses error message from Java 1.5
*/
static final String INVALID_FORWARD_REFERENCE = "No such group yet exists at this point in the pattern";
static final String INVALID_HEX_CODE = "Character value in \\x{...} sequence is too large";
static final String MISSING_TERMINATOR = "Missing terminator for subpattern name";
static final String NONEXISTENT_SUBPATTERN = "Reference to non-existent subpattern";
static final String NUMERIC_RANGE_EXPECTED = "Numeric range expected";
static final String POSIX_OUTSIDE_CLASS = "POSIX class outside of character class";
static final String SUBPATTERN_NAME_EXPECTED = "Subpattern name expected";
static final String UNCLOSED_COMMENT = "Missing closing ')' after comment";
static final String UNCLOSED_GROUP = "Unclosed group";
static final String UNCLOSED_RANGE = "Missing closing ']' after numeric range";
static final String UNKNOWN_POSIX_CLASS = "Unknown POSIX class name";
static final String UNMATCHED_PARENTHESES = "Unmatched closing ')'";
static final String ZERO_REFERENCE = "A numbered reference is zero";
static final String INVALID_SUBROUTINE = "Subroutine contains unsupported syntax for a subroutine";
static final String CIRCULAR_SUBROUTINE = "Subroutine cannot be circular (have a subroutine which depends on itself)";
/**
* Resets the necessary values before performing the next step of the
* refactoring.
*/
void reset() {
// flags = pattern.flags();
this.flags = this.initializeFlags();
this.parenthesisDepth = 0;
this.charClassDepth = 0;
this.currentGroup = 0;
this.totalGroups = 0;
this.namedGroup = 0;
this.unnamedGroup = 0;
this.isInComments = false;
this.result = new StringBuffer();
this.addTestGroup.clear();
this.handleElseBranch.clear();
this.branchReset.clear();
this.flagsStack.clear();
if (this.inSubroutine()) {
this.groupCounts = this.subpattern.getGroupCounts();
} else {
this.groupCounts.clear();
}
this.changes.addAll(this.differences);
this.differences = new Differences();
}
/**
* Internal method used for handling all patternSyntax errors. The
* pattern is
* displayed with a pointer to aid in locating the patternSyntax error.
*
* @param errorMessage
* the PatternErrorMessage
for the error
* @param index
* The approximate index in the pattern of the error, or -1
* if the index is not known
* @return a PatternSyntaxException
with the given error
* message, index, and using the original regex
*/
// private PatternSyntaxException error(PatternErrorMessage
// errorMessage,
// int index)
private PatternSyntaxException error(final String errorMessage, final int index) {
return this.error(errorMessage, index, null);
}
/**
* Internal method used for handling all patternSyntax errors. The
* pattern is displayed with a pointer to aid in locating the patternSyntax error.
*
* @param errorMessage
* the PatternErrorMessage
for the error
* @param index
* The approximate index in the pattern of the error, or -1
* if the index is not known
* @param additionalDetails
* the additional details (or null
if there are none)
* @return a PatternSyntaxException
with the given error
* message, index, and using the original regex
*/
private PatternSyntaxException error(final String errorMessage, final int index, final String additionalDetails) {
int originalIndex;
try {
originalIndex = this.changes.getOriginalIndex(index);
} catch (IllegalArgumentException e) {
originalIndex = -1;
}
return new PatternSyntaxException(errorMessage, this.regex, originalIndex, additionalDetails);
}
/**
* Creates a new Refactor object and refactors the given regular
* expression so that it can be compiled by the original Pattern class.
*
* @param regex
* the regular expression to refactor
* @param patternSyntax
* the PatternSyntax
*/
// Refactor(String regex)
Refactor(final Pattern pattern) {
this(pattern, pattern.pattern(), null);
}
Refactor(final Subpattern subpattern, final String regex) {
this(subpattern.getParentPattern(), regex, subpattern);
}
private Refactor(final Pattern pattern, final String regex, final Subpattern subpattern) {
this.pattern = pattern;
this.regex = regex;
this.subpattern = subpattern;
// this.refactor = null;
// this.flags = pattern.flags();
this.flags = this.initializeFlags();
// System.out.println("Flags: " + new PatternFlags(flags));
this.subpatterns = subpattern != null ? subpattern.getParentSubpatterns() : new HashMap<>();
// if (has(LITERAL)) {
// this.result = new StringBuffer(regex);
// initializeForZeroGroups();
// return;
// }
String javaVersion = System.getProperty("java.version");
this.isJava1_5 = naturalCompareTo(javaVersion, "1.5.0") >= 0 && naturalCompareTo(javaVersion, "1.6.0") < 0;
// if (!inSubroutine()) {
this.result = new StringBuffer();
this.preRefactor();
this.preRefactoringChanges = this.differences;
// } else
// this.result = new StringBuffer(regex);
this.refactor();
this.afterRefactor();
}
private int initializeFlags() {
return this.subpattern != null ? this.subpattern.getFlags() : this.pattern.flags();
}
/**
* Returns the current result of the refactoring.
*/
@Override
public String toString() {
return this.result.toString();
}
/**
* Gets the subpatterns.
*
* @return the subpatterns
*/
Map getSubpatterns() {
return this.subpatterns;
}
/**
* Indicates whether the refactoring is being done within a subroutine
*
* @return true
if currently refactoring a subroutine
*/
private boolean inSubroutine() {
return this.subpattern != null;
}
/**
* Gets the pattern.
*
* @return the pattern
*/
Pattern getPattern() {
return this.pattern;
}
/**
* Indicates whether a particular flag is set or not.
*
* @param f
* the flag to test
* @return true
if the particular flag is set
*/
boolean has(final int f) {
return (this.flags & f) != 0;
}
/**
* Returns the group index for the first non-null group.
*
* @param matcher
* the matcher
* @return the group index for the first non-null group
*/
static int getUsedGroup(final java.util.regex.Matcher matcher) {
for (int i = 1; i <= matcher.groupCount(); i++) {
if ((matcher.start(i)) != -1) {
return i;
}
}
return 0;
}
/**
*
* Used in the loop that matches parts for the current refactoring step.
*
*
* Sets {@link #match} to the matched string, and sets {@link #group} to the index for the first non-null
* capture group.
*
* @return true if the current loop should be skipped
*/
private boolean loopSetup() {
this.match = this.matcher.group();
this.group = getUsedGroup(this.matcher);
if (this.isInComments) {
if (this.match.startsWith("(?#") && !this.inCharClass()) {
// Comment block
return false;
} else if (this.match.startsWith("\\Q")) {
// Quote block
return false;
} else if (this.match.equals("\n") || this.match.equals("\r") || this.match.equals("\u0085")
|| this.match.equals("\u2028") || this.match.equals("\u2029")) {
// Line terminator [\n\r\u0085\u2028\u2029]
return false;
} else {
// Other syntax should be ignored
return true;
}
}
return false;
}
/**
* Sets the number of capture groups.
*
* @param capturingGroupCount
* the number of capture groups
*/
void setCapturingGroupCount(final int capturingGroupCount) {
if (!this.inSubroutine()) {
this.pattern.setCapturingGroupCount(capturingGroupCount);
}
}
/**
* @param addedGroups
* @since 0.2
*/
void setAddedGroups(final boolean addedGroups) {
if (!this.inSubroutine()) {
this.pattern.setAddedGroups(addedGroups);
}
}
/**
* Sets the group name counts.
*
* @param groupNameCounts
* the group name counts
*/
void setGroupNameCounts(final Map groupNameCounts) {
if (!this.inSubroutine()) {
this.pattern.setGroupCounts(groupNameCounts);
}
}
/**
* Steps taken to prepare the regular expression to be refactored
*/
private void preRefactor() {
this.text = this.regex;
this.matcher = RefactorUtility.preRefactor.matcher(this.regex);
// add a map from group 0 to group 0
// TODO: add occurrence??
this.addUnnamedGroup("[0][1]", 0);
this.setGroupCount("[0]", 1);
while (this.matcher.find()) {
if (this.loopSetup()) {
continue;
}
if (this.group == 1) {
/*
* matches
* "(?onFlags-offFlags)" or "(?onFlags-offFlags:"
* (also matches a non-capture group
* - onFlags/offFlags are omitted)
*
* group: onFlags (empty string if none)
* group + 1: offFlags
* (empty string if none; null, if omitted)
*
* (2 groups)
*/
if (!this.inCharClass()) {
this.preRefactorFlags();
}
} else if (this.group >= 3 && this.group <= 8) {
/*
* matches a named capture group
* "(?" (form 0)
* "(?'name'" (form 1)
* "(?P" (form 2)
*
* group: everything after first symbol
* group + 1: the name
* (6 groups)
*/
if (!this.inCharClass()) {
int form = (this.group - 3) / 2;
this.preRefactorCaptureGroup(FOR_NAME, form);
}
} else if (this.group == 9) {
/*
* matches an unnamed capture group "("
* - not followed by a "?" (or a '*', used by verbs)
*
* group: everything (1 group)
*/
if (!this.inCharClass()) {
int form = this.group - 9;
this.preRefactorCaptureGroup(!FOR_NAME, form);
}
} else if (this.group >= 10 && this.group <= 14) {
/*
* matches a back reference (by name)
* "\g{name}" (form 0)
* "\k" (form 1)
* "\k'name'" (form 2)
* "\k{name}" (form 3)
* "(?P=name)" (form 4)
*
* group : the name
* ( can only be an "any group" (e.g. groupName[0]))
* (5 groups)
*/
if (!this.inCharClass()) {
this.preRefactorBackreference();
}
} else if (this.group == 15) {
/*
* matches an assert condition
* "(?(?=)", "(?(?!)", "(?(?<=)", or "(?(?= 17 && this.group <= 25) {
/*
* matches a reference condition (by name)
* "(?()" (form 0),
* "(?('name')" (form 1), or
* "(?(name)" (form 2)
*
* group: everything after first symbol (excluding ")")
* group + 1: the name
* group + 2: the occurrence (if specified)
* (9 groups)
*/
if (!this.inCharClass()) {
int form = (this.group - 17) / 3;
this.preRefactorConditionalPattern(FOR_NAME, form);
}
} else if (this.group == 26) {
/*
* matches comment group
* "(?#comment) - comment cannot contain ")"
*
* group: everything
* (1 group)
*/
if (!this.inCharClass()) {
if (!this.match.endsWith(")")) {
throw this.error(UNCLOSED_COMMENT, this.matcher.end());
}
if (this.isInComments) {
this.checkForLineTerminator();
}
// remove (in internal pattern)
this.replaceWith("");
}
} else if (this.group == 27) {
/*
* matches a "branch reset" subpattern "(?|"
*
* group: everything
* (1 group)
*/
if (!this.inCharClass()) {
this.preRefactorBranchReset();
}
} else if (this.group == 28) {
/*
* FAIL verb (from PCRE) - always fails
* (*FAIL) or (*F) - case sensitive
*
* synonym for (?!)
*/
if (!this.inCharClass()) {
this.replaceWith(fail());
}
} else {
this.preRefactorOthers();
}
}
this.unnamedGroupCount = this.unnamedGroup;
this.setCapturingGroupCount(this.currentGroup);
this.setGroupCount("", this.currentGroup);
this.setAddedGroups(this.totalGroups != this.currentGroup);
if (this.has(DOTNET_NUMBERING)) {
// for each named group, mark the group count as 1
// for its respective number group
// (doesn't affect named groups in "branch reset" patterns)
for (int i = 1; i <= this.namedGroup; i++) {
int groupIndex = this.unnamedGroupCount + i;
this.setGroupCount("[" + groupIndex + "]", 1);
}
int currentNamedGroup = 0;
for (Entry entry : this.perlGroupMapping().entrySet()) {
String mappingName = entry.getValue();
if (mappingName.charAt(0) != '[') {
// a named group - using mapping name of unnamed group
currentNamedGroup++;
String groupName = wrapIndex(this.unnamedGroupCount + currentNamedGroup);
mappingName = getMappingName(groupName, 0);
entry.setValue(mappingName);
}
}
}
this.setGroupNameCounts(new HashMap<>(this.groupCounts));
for (String groupName : this.anyGroupReferences) {
// if (groupCount(groupName) != 1)
// System.out.println("getGroupCount: " + groupName);
if (this.getGroupCount(groupName) != 1) {
this.requiresTestingGroup.add(getMappingName(groupName, 0));
}
}
this.matcher.appendTail(this.result);
}
/**
* Refactors the regular expression
*/
private void refactor() {
this.text = this.result.toString();
// matcher = getRefactorPattern().matcher(result);
this.matcher = RefactorUtility.refactor.matcher(this.result);
this.reset();
while (this.matcher.find()) {
if (this.loopSetup()) {
continue;
}
// System.out.println("group: " + group + "\t" + matcher.group());
if (this.group == 1) {
/*
* matches an unnamed subroutine reference
* "(?[-+]n)" (form 0)
*
* group: number (including [-+] to make it relative)
* (1 group)
*/
// System.out.println("Subroutine: " + matcher.group(group));
if (!this.inCharClass()) {
this.refactorSubroutine(!FOR_NAME);
}
} else if (this.group == 2) {
/*
* matches a named subroutine reference
* "(?&group)" (form 0)
* "(?P>group)" (form 1)
*
* group: group name
* group + 1: occurrence
* (2 group)
*/
if (!this.inCharClass()) {
this.refactorSubroutine(FOR_NAME);
}
} else if (this.group == 4) {
/*
* matches "(?onFlags-offFlags)" or "(?onFlags-offFlags:"
* (also matches a non-capture group
* - onFlags/offFlags are omitted)
*
* group: onFlags (empty string if none)
* group + 1: offFlags
* (empty string if none; null, if omitted)
* (2 groups)
*/
if (!this.inCharClass()) {
this.refactorFlags();
}
} else if (this.group >= 6 && this.group <= 8) {
/*
* matches a named capture group
* "(?" (form 0)
* "(?'name'" (form 1)
* "(?P" (form 2)
*
* group: the name
* (3 groups)
*/
if (!this.inCharClass()) {
this.refactorCaptureGroup(FOR_NAME);
}
} else if (this.group == 9) {
/*
* matches an unnamed capture group
* "(" - not followed by a "?"
*
* group: everything
* (1 group)
*/
if (!this.inCharClass()) {
this.refactorCaptureGroup(!FOR_NAME);
}
} else if (this.group >= 10 && this.group <= 13) {
int startGroup = 10;
int groupCount = 4;
/*
* matches a back reference (by number)
* "\n" (form 0)
* "\gn" (form 1)
* "\g{n}" or "\g{-n}" (form 2)
*
* group: the number
* last group: the next character (if a digit)
* (4 groups)
*/
if (!this.inCharClass() || this.group == startGroup) {
int form = this.group - startGroup;
int digitGroup = startGroup + groupCount - 1;
this.refactorBackreference(!FOR_NAME, form, digitGroup);
}
} else if (this.group >= 14 && this.group <= 29) {
int startGroup = 14;
int groupCount = 16;
/*
* matches a back reference (by name)
* "\g{name}" (form 0)
* "\k" (form 1)
* "\k'name'" (form 2)
* "\k{name}" (form 3)
* "(?P=name)" (form 4)
*
* group: everything after the first symbol
* group + 1: the name
* group + 2: the occurrence (if specified)
* last group: the next character (if a digit)
* (16 groups)
*/
if (!this.inCharClass()) {
int form = (this.group - startGroup) / 3;
int digitGroup = startGroup + groupCount - 1;
this.refactorBackreference(FOR_NAME, form, digitGroup);
}
} else if (this.group == 30) {
/*
* matches an assert condition
* "(?(?=)", "(?(?!)", "(?(?<=)", or "(?(?= 32 && this.group <= 37) {
/*
* matches a named reference condition
* "(?()" (form 0),
* "(?('name')" (form 1), or
* "(?(name)" (form 2)
*
* group: the name
* group + 1: the occurrence (if specified)
* (6 groups)
*/
if (!this.inCharClass()) {
this.refactorConditionalPattern(FOR_NAME);
}
} else if (this.group == 38) {
/*
* matches a "branch reset" subpattern "(?|"
*
* group: everything
* (1 group)
*/
if (!this.inCharClass()) {
this.refactorBranchReset();
}
} else if (this.group == 39) {
/*
* matches an unbounded numeric range
* such as "(?Z[<1.234])"
*
* group: "Z" or "NZ"
* group + 1: comparison (such as "<")
* group + 2: value (such as "1.234")
* (3 groups)
*/
if (!this.inCharClass()) {
String mode = this.matcher.group(this.group);
String operator = this.matcher.group(this.group + 1);
Comparison comparison = Comparison.valueOf(operator.contains("<"), operator.contains("="));
int endRange = this.matcher.end(this.group + 2);
if (endRange >= this.text.length() || this.text.charAt(endRange) != ']') {
throw this.error(UNCLOSED_RANGE, endRange);
}
if (!this.match.endsWith(")")) {
// error is set at character after "]"
throw this.error(UNCLOSED_GROUP, endRange + 1);
}
this.replaceWith(nonCaptureGroup(PatternRange.unboundedRange(comparison,
this.matcher.group(this.group + 2), new RangeMode(mode))));
}
} else if (this.group == 42) {
/*
* matches a numeric range
* "(?Z[start..end])" or "(?NZ[start..end])"
*
* group: "Z" or "NZ" (optional base and L/U)
* //group + 1: "r" for raw mode, or
* group + 1: start
* group + 2: end
* (3 groups)
*/
if (!this.inCharClass()) {
this.refactorNumericRange();
}
} else {
this.refactorOthers();
}
}
if (this.parenthesisDepth > 0) {
throw this.error(UNCLOSED_GROUP, this.text.length());
}
this.matcher.appendTail(this.result);
}
/**
* Steps taken after the regular expression is refactored
*/
private void afterRefactor() {
this.text = this.result.toString();
this.matcher = RefactorUtility.afterRefactor.matcher(this.result);
this.reset();
while (this.matcher.find()) {
if (this.loopSetup()) {
continue;
}
if (this.group == 1) {
/*
* matches:
* "\g{##group-mappingName}"
* "\g{##branchGroup-mappingName}"
* "\g{##test-mappingName}"
* "\g{##testF-mappingName}"
*
* group: the number - mapped to the position to show the
* error
* group + 1: type ("group", "test", or "testF")
* group + 2: mappingName
* (3 groups)
*/
if (!this.inCharClass()) {
String groupType = this.matcher.group(this.group + 1);
String mappingName = this.matcher.group(this.group + 2);
if (groupType.equals("group")) {
this.replaceWith(this.acceptGroup(mappingName));
} else if (groupType.equals("branchGroup")) {
this.replaceWith(this.acceptBranchReset(mappingName));
} else if (groupType.equals("test")) {
this.replaceWith(this.acceptTestingGroup(mappingName));
} else if (groupType.equals("testF")) {
this.replaceWith(this.failTestingGroup(mappingName));
}
}
} else if (this.group == 4) {
/*
* matches "(?onFlags-offFlags)" or "(?onFlags-offFlags:"
* (also matches a non-capture group - onFlags/offFlags are
* omitted)
*
* group: onFlags (empty string if none)
* group + 1: offFlags (empty string if none; null, if
* omitted)
* (2 groups)
*/
if (!this.inCharClass()) {
this.afterRefactorFlags();
}
} else if (this.group == 6) {
/*
* matches "\x{hhh..} - a hex code"
*
* group: the number
* (1 group)
*/
this.afterRefactorHexUnicode();
} else if (this.group == 7) {
/*
* matches "\xh" or "\xhh" - a hex code
*
* group: the number
* (1 group)
*/
this.afterRefactorHexChar();
} else if (this.group == 8) {
/*
* matches a unicode character
*
* group: the number
* (1 group)
*/
this.afterRefactorUnicode();
} else if (this.group == 9) {
/*
* matches a POSIX character class
*
* group: "^" or "" (whether to negate or not)
* group + 1: the class name
* (2 group)
*/
this.afterRefactorPosixClass();
} else if (this.group == 11) {
/*
* matches a control character - \cA through \cZ
*
* These are equivalent to \x01 through \x1A (26 decimal).
*
* group: the control character's letter
* (either upper and lower case are allowed)
* (1 group)
*/
this.afterRefactorControlCharacter();
} else if (this.group == 12) {
/*
* matches an unnamed capture group "(" - not followed by a
* "?"
*
* group: everything (1 group)
*/
if (!this.inCharClass()) {
this.afterRefactorCaptureGroup();
}
} else {
this.afterRefactorOthers();
}
}
this.matcher.appendTail(this.result);
}
/**
* Returns a regular expression that matches the capture group with the
* given mapping name.
*
* @param mappingName
* the mapping name for the group to accept
* @return a "raw" RegEx that matches the capture group with the given
* mapping name
*/
private String acceptGroup(final String mappingName) {
String accept;
if (isAnyGroup(mappingName)) {
accept = this.anyGroup(mappingName);
} else {
int mappedIndex = this.pattern.getMappedIndex(mappingName);
if (this.invalidForwardReference(mappedIndex)) {
throw this.invalidForwardReference();
}
accept = "\\" + mappedIndex;
}
return accept;
}
/**
* Returns a regular expression that matches any one of the groups
* with the given group name.
*
* @param mappingName
* group name with an occurrence of 0, an "any group"
* @return a regular expression that matches any one of the groups with
* the given group name
*/
private String anyGroup(final String mappingName) {
if (mappingName.charAt(0) == '[') {
// ex. [1][0] - (i.e. an unnamed group)
return this.acceptBranchReset(mappingName);
}
// remove trailing "[0]"
String groupName = anyGroupName(mappingName);
// int groupCount = groupCount(groupName);
int groupCount = this.getGroupCount(groupName);
StringBuilder acceptAny = new StringBuilder();
StringBuilder previousGroups = new StringBuilder();
for (int i = 1; i <= groupCount; i++) {
String tmpMappingName = getMappingName(groupName, i);
int testingGroup = this.getTestingGroup(tmpMappingName);
int mappedIndex = this.pattern.getMappedIndex(tmpMappingName);
if (this.invalidForwardReference(mappedIndex)) {
continue;
}
acceptAny.append(previousGroups).append("\\").append(mappedIndex).append('|');
previousGroups.append(RefactorUtility.failTestingGroup(testingGroup));
}
if (acceptAny.length() == 0) {
throw this.invalidForwardReference();
}
return acceptAny.deleteCharAt(acceptAny.length() - 1).toString();
}
/**
* Returns a regular expression that matches any one of the groups
* in a "branch reset" subpattern with the given group name.
*
* @param mappingName
* group name with an occurrence of 0, an "any group"
* @return a regular expression that matches any one of the groups in a
* "brach reset" subpattern with the given group name
*/
private String acceptBranchReset(final String mappingName) {
// remove trailing "[0]"
String groupName = anyGroupName(mappingName);
// int groupCount = groupCount(groupName);
int groupCount = this.getGroupCount(groupName);
StringBuilder acceptAnyBranch = new StringBuilder();
for (int i = 1; i <= groupCount; i++) {
int mappedIndex = this.pattern.getMappedIndex(groupName, i);
if (this.invalidForwardReference(mappedIndex)) {
continue;
}
acceptAnyBranch.append("\\").append(mappedIndex).append('|');
}
if (acceptAnyBranch.length() == 0) {
throw this.invalidForwardReference();
}
return acceptAnyBranch.deleteCharAt(acceptAnyBranch.length() - 1).toString();
}
/**
* Returns a regular expression which will match the testing group(s)
* associated with the specified mapping name.
*/
private String acceptTestingGroup(final String mappingName) {
String accept;
if (isAnyGroup(mappingName)) {
accept = this.anyCondition(mappingName);
} else {
Integer testingGroup = this.getTestingGroup(mappingName);
if (testingGroup == null) {
throw this.error(INTERNAL_ERROR, -1);
}
if (this.invalidForwardReference(testingGroup)) {
throw this.invalidForwardReference();
}
accept = "\\" + testingGroup;
}
return "(?=" + accept + ")";
}
/**
* Returns a regular expression which fails if the testing group(s)
* associated with the specified mapping name matches.
*/
private String failTestingGroup(final String mappingName) {
String fail;
if (isAnyGroup(mappingName)) {
fail = this.anyCondition(mappingName);
} else {
int testingGroup = this.getTestingGroup(mappingName);
if (this.invalidForwardReference(testingGroup)) {
throw this.invalidForwardReference();
}
fail = "\\" + testingGroup;
}
return "(?!" + fail + ")";
}
/**
* Returns a regular expression that matches any one of the
* "testing groups" associated with the given mapping name.
*
* @param mappingName
* group name with an occurrence of 0, an "any group"
* @return a RegEx that matches any one of the "testing groups"
* associated with the given mapping name
*/
private String anyCondition(final String mappingName) {
// remove trailing "[0]"
String groupName = anyGroupName(mappingName);
// int groupCount = groupCount(groupName);
int groupCount = this.pattern.getGroupCount(groupName);
StringBuilder acceptAny = new StringBuilder();
// System.out.println(groupName + "\t" + groupCount);
for (int i = 1; i <= groupCount; i++) {
String tmpMappingName = getMappingName(groupName, i);
int testingGroup = this.getTestingGroup(tmpMappingName);
if (this.invalidForwardReference(testingGroup)) {
continue;
}
acceptAny.append("\\").append(testingGroup).append('|');
}
if (acceptAny.length() == 0) {
throw this.invalidForwardReference();
}
return acceptAny.deleteCharAt(acceptAny.length() - 1).toString();
}
/**
* Adds a mapping for the specified unnamed group.
*
* @param mappingName
* the mapping name
* @param targetGroupIndex
* the actual group number (in the internal pattern)
*/
private void addUnnamedGroup(final String mappingName, final int targetGroupIndex) {
this.addGroup(mappingName, targetGroupIndex);
}
/**
* Adds a mapping for the specified named group.
*
* @param mappingName
* the mapping name
* @param targetGroupIndex
* the actual group number (in the internal pattern)
*/
private void addNamedGroup(final String mappingName, final int targetGroupIndex) {
this.addGroup(mappingName, targetGroupIndex);
}
/**
* Adds a mapping from mappingName
to
* targetGroupIndex
to the group mapping
*
* @param mappingName
* the mapping name
* @param targetGroupIndex
* the actual group number (in the internal pattern)
*/
private void addGroup(final String mappingName, final int targetGroupIndex) {
if (!this.inSubroutine()) {
this.pattern.getGroupMapping().put(mappingName, targetGroupIndex);
}
}
/**
* Adds a new mapping to {@link #testConditionGroups}.
*
* @param mappingName
* the mapping name
* @param targetGroupIndex
* the actual group number (in the internal pattern)
*/
private void addTestingGroup(final String mappingName, final int targetGroupIndex) {
// TODO: verify this is correct
if (!this.inSubroutine()) {
this.testConditionGroups.put(mappingName, targetGroupIndex);
}
}
/**
*
* Returns the absolute group number associated with the match.
*
*
*
* If the group number is relative, then it is converted to an absolute
* occurrence
*
*
* TODO: modify function name
*
* @param index
* the index
* @param groupCount
* the group count
* @return the absolute group number associated with the match
*/
private String getAbsoluteGroup(final String index, final int groupCount) {
boolean startsWithPlus = index.charAt(0) == '+';
int groupIndex = parseInt(startsWithPlus ? index.substring(1) : index);
if (startsWithPlus) {
groupIndex += groupCount;
} else if (groupIndex < 0) {
groupIndex = getAbsoluteGroupIndex(groupIndex, groupCount);
if (groupIndex == -1) {
return RefactorUtility.neverUsedMappingName();
}
if (this.has(DOTNET_NUMBERING)) {
return this.getPerlGroup(groupIndex);
}
}
// System.out.println(index + ":" + groupCount + " -> " + groupIndex);
return getMappingName(groupIndex, 0);
}
/**
* Appends result with the specified (literal) string, and
* adds a new state to {@link #differences}.
*/
/*
* private StringBuffer appendWith(String str)
* {
* differences.insert(result.length(), str);
* result.append(str);
*
* return result;
* }
*/
/**
* Normalizes the group name.
*/
String normalizeGroupName(String groupName) {
if (groupName.startsWith("[") && groupName.endsWith("]")) {
// Remove "[" and "]"
groupName = groupName.substring(1, groupName.length() - 1);
boolean startsWithPlus = groupName.startsWith("+");
int groupIndex = parseInt(startsWithPlus ? groupName.substring(1) : groupName);
if (startsWithPlus) {
groupIndex += this.currentGroup;
} else if (groupIndex < 0) {
groupIndex = getAbsoluteGroupIndex(groupIndex, this.currentGroup);
if (groupIndex == -1) {
return neverUsedMappingName();
}
if (this.has(DOTNET_NUMBERING)) {
String tmpGroupName = this.getPerlGroup(groupIndex);
if (tmpGroupName.charAt(0) == '[') {
return anyGroupName(tmpGroupName);
} else {
return groupName;
// TODO: what should "else" case return?
// previously groupName was returned (sounds incorrect)
}
}
}
return wrapIndex(groupIndex);
}
return groupName;
}
private String getAbsoluteNamedGroup(final String groupName, final String groupOccurrence) {
try {
int index = parseInt(groupOccurrence);
int groupCount = this.getGroupCount(groupName);
// System.out.println("get abs name: " + groupCount + "\t" + index);
int occurrence = getAbsoluteGroupIndex(index, groupCount);
return getMappingName(groupName, occurrence);
} catch (IndexOutOfBoundsException e) {
return RefactorUtility.neverUsedMappingName();
}
}
/**
* Appends result with the specified (literal) string, and
* adds a new state to {@link #differences}.
*/
/*
* private StringBuffer appendWith(String str)
* {
* differences.insert(result.length(), str);
* result.append(str);
*
* return result;
* }
*/
/**
* @param groupName
* the group name whose group index is returned
* @param groupOccurrence
* the group occurrence
* @return the group index
*/
private String getAbsoluteGroup(final String groupName, final String groupOccurrence) {
if (groupOccurrence == null) {
// e.g. groupName
return getMappingName(groupName, 0);
} else if (groupName.length() == 0) {
return this.getAbsoluteGroup(groupOccurrence, this.currentGroup);
} else {
return this.getAbsoluteNamedGroup(groupName, groupOccurrence);
}
}
/**
* Gets the displayed name for the group.
*
* @param groupName
* the group name
* @param groupOccurrence
* the group occurrence
* @return the displayed name for the group
*/
private String getDisplayName(final String groupName, final String groupOccurrence) {
return groupOccurrence == null ? groupName : groupName + "[" + groupOccurrence + "]";
}
/**
* Returns the mapping from perl group numbers to actual group number
*
* @return
*/
private HashMap perlGroupMapping() {
if (this.perlGroupMapping == null) {
this.perlGroupMapping = new HashMap<>();
}
return this.perlGroupMapping;
}
private String getPerlGroup(final int groupIndex) {
return this.perlGroupMapping().get(groupIndex);
}
/**
* Returns the group index for the testing group associated with the
* given mapping name.
*
* @param mappingName
* the mapping name
* @return the group index for the testing group associated with the
* given mapping name
*/
private Integer getTestingGroup(final String mappingName) {
return this.testConditionGroups.get(mappingName);
}
/**
* Add a mapping from integer (incremental) to the position of the match
* as an "in case of error" trace
*
* @param errorIndex
* position to show error
*
* @return the integer key for the added mapping
*/
private Integer addErrorTrace(final int errorIndex) {
if (this.errorTrace == null) {
this.errorTrace = new HashMap<>(2);
}
Integer key = this.errorTrace.size();
this.errorTrace.put(this.errorTrace.size(), errorIndex);
return key;
}
private PatternSyntaxException invalidForwardReference() {
int index = this.errorTrace.get(Integer.valueOf(this.matcher.group(this.group)));
try {
index = this.preRefactoringChanges.getOriginalIndex(index);
} catch (IllegalArgumentException e) {
index = -1;
}
return new PatternSyntaxException(INVALID_FORWARD_REFERENCE, this.regex, index);
}
/**
* Indicates whether the given index is an invalid forward reference
*
* @param mappedIndex
* the index for the group in the internal
* Matcher
* @return true
if the given index is an invalid forward
* reference
*/
private boolean invalidForwardReference(final int mappedIndex) {
if (this.isJava1_5 && mappedIndex > this.totalGroups + 1) {
return true;
}
return mappedIndex > this.totalGroups && mappedIndex >= 10;
}
/**
* Gets the group counts.
*
* @return the group counts
*/
Map getGroupCounts() {
return this.groupCounts;
}
/**
* Returns the group count for the given group name.
*
* @param groupName
* the group name
* @return the group count for the given group name
*/
private int getGroupCount(final String groupName) {
// if (groupName.length() == 0)
// return currentGroup;
// System.out.println("groupCounts: " + groupCounts);
Integer groupCount = this.groupCounts.get(groupName);
return (groupCount == null ? 0 : groupCount);
}
/**
* Sets the group count for the given group name.
*
* @param groupName
* the group name
* @param groupCount
* the group count
*/
private void setGroupCount(final String groupName, final int groupCount) {
if (!this.inSubroutine()) {
this.groupCounts.put(groupName, groupCount);
}
}
/**
* Increases the group count for the given group name by one.
*
* @param groupName
* the group name
*
* @return the (new) group count for the given group name
*/
private int increaseGroupCount(final String groupName) {
int groupCount = this.getGroupCount(groupName) + 1;
// store the new group count
this.setGroupCount(groupName, groupCount);
return groupCount;
}
/**
* Returns whether the current parenthesis depth matches the parenthesis
* depth of the top-most match state
*
* @param matchStates
* a stack of states
* @return whether the current parenthesis depth matches the parenthesis
* depth of the top-most match state
*/
private boolean atRightDepth(final Stack extends State> matchStates) {
if (matchStates.isEmpty()) {
return false;
}
return matchStates.peek().getParenthesisDepth() == this.parenthesisDepth;
}
/**
* Increases the parenthesis depth by one.
*
* @param duringPreRefactor
* whether this function call occurs during the pre-refactor
* step
*/
private void increaseParenthesisDepth(final boolean duringPreRefactor) {
this.parenthesisDepth++;
this.flagsStack.push(this.flags);
}
/**
* Increases the current group.
*
* @param duringPreRefactor
* whether this function call occurs during the pre-refactor
* step
*/
private void increaseCurrentGroup(final boolean duringPreRefactor) {
this.currentGroup++;
if (!duringPreRefactor) {
// if (digitCount(currentGroup) != digitCount(currentGroup - 1))
// matcher.usePattern(getRefactorPattern(currentGroup));
this.totalGroups++;
}
}
private void addSubpattern(final String mappingName, final Subpattern subpattern) {
// +1 is because the group hasn't been added yet
// int occurrence = getGroupCount(wrapIndex(currentGroup)) + 1;
// String mappingName = getMappingName(currentGroup, occurrence);
// System.out.println("Group (" + mappingName + "): " + parenthesisDepth);
// Subpattern subpattern = new Subpattern(mappingName);
// Set to subpattern start
// (after the captured text, which is the group name / symbols)
// subpattern.setStart(matcher.end(group));
if (!this.inSubroutine()) {
// subpattern.addSubpatternDependency(mappingName);
this.getSubpatterns().put(mappingName, subpattern);
}
// openSubpatterns.put(parenthesisDepth, subpattern);
}
/**
* Steps to perform when encountering an close parenthesis.
*
* @param duringPreRefactor
* whether this function call occurs during the pre-refactor
* step
*/
private void decreaseParenthesisDepth(final boolean duringPreRefactor) {
if (duringPreRefactor && !this.inSubroutine()) {
// System.out.println("Close: " + parenthesisDepth);
Subpattern subpattern = this.openSubpatterns.get(this.parenthesisDepth);
if (subpattern != null) {
this.openSubpatterns.remove(this.parenthesisDepth);
// Don't include end parenthesis
subpattern.setEnd(this.matcher.start());
String subregex = this.regex.substring(subpattern.getStart(), subpattern.getEnd());
// System.out.println("Subregex: " + subregex);
subpattern.setPattern(subregex);
// subpattern.setPattern(new Refactor(subpattern, subregex).toString());
// System.out.println("Refactored subpattern: " + subpattern.getPattern());
/*
* Subroutines
*
* 0) Run like a subroutine in code, must behave as if the regex jumped to that point in the pattern to
* match; if that's not possible in Java, throw an error
*
* 1) Relative references should be made absolute
*/
// System.out.println("Subpattern: " + subpattern.getPattern());
}
// Subpattern subpattern = pattern.getSubpatterns().get();
}
this.parenthesisDepth--;
this.flags = this.flagsStack.pop();
}
/*
* The below functions contain the steps to refactor a specific part of
* the refactoring. The respective function is called during the
* different steps in the refactoring process and for each part in the
* refactoring of that step.
*/
/**
* Refactors the subroutine during the refactoring step.
*
* @param isNamedGroup
* whether the subroutine is a named group
*/
private void refactorSubroutine(final boolean isNamedGroup) {
int errorPosition = this.matcher.start(this.group);
String displayName;
String mappingName;
// TODO: Test whether group occurrences are supported
// TODO: Test whether relative group occurrences are supported within subpatterns
if (isNamedGroup) {
String groupName = this.normalizeGroupName(this.matcher.group(this.group));
String groupOccurrence = this.matcher.group(this.group + 1);
int groupOccurenceInt = groupOccurrence == null ? 0 : Integer.parseInt(groupOccurrence);
// System.out.println("Group name: " + groupName);
// System.out.println("Group occurrence: " + groupOccurrence);
displayName = this.getDisplayName(this.matcher.group(this.group), this.matcher.group(this.group + 1));
// mappingName = getAbsoluteGroup(groupName, groupOccurrence);
mappingName = getMappingName(groupName, groupOccurenceInt);
} else {
displayName = this.getDisplayName(this.matcher.group(this.group), null);
mappingName = this.getAbsoluteGroup(this.matcher.group(this.group), this.currentGroup);
}
if (mappingName.startsWith("[0]")) {
throw this.error(ZERO_REFERENCE, errorPosition);
}
// TODO: mimics PCRE, by using first occurrence
if (isAnyGroup(mappingName)) {
mappingName = anyGroupName(mappingName) + "[1]";
}
// System.out.println("Subroutine: " + mappingName);
// System.out.println("Mapped index: " + pattern.getMappedIndex(mappingName));
// System.out.println("Pattern: " + pattern);
// System.out.println("Group mapping: " + pattern.getGroupMapping());
if (this.pattern.getMappedIndex(mappingName) == null) {
throw this.error(NONEXISTENT_SUBPATTERN, errorPosition);
}
@SuppressWarnings("hiding")
Subpattern subpattern = this.getSubpatterns().get(mappingName);
if (subpattern == null) {
throw this.error(INVALID_SUBROUTINE, errorPosition);
}
// System.out.println("Subpattern pattern: " + subpattern.getPattern());
// if (!subpattern.addSubpatternDependency(mappingName))
// throw error(CIRCULAR_SUBROUTINE, matcher.start(group));
subpattern.addSubpatternDependency(mappingName);
try {
// Uses an atomic group, same as PCRE
this.replaceWith("(?>" + subpattern.getPattern(this.flags) + ")");
} catch (PatternSyntaxException e) {
// TODO: use mapping name, if contains relative reference
PatternSyntaxException error = this.error(e.getDescription(), errorPosition,
"Subroutine: \"" + displayName + "\"" + newLine + e.getMessage());
error.setStackTrace(e.getStackTrace());
throw error;
}
}
/**
* Modify the {@link #flags} variable to account for a change in the
* flags (some of flags may be ignored).
*
* @param onFlags
* the flags that were turned on
* @param offFlags
* the flags that were turned off
*/
private void setFlags(final String onFlags, final String offFlags) {
if (onFlags.contains("x")) {
this.flags |= COMMENTS;
}
if (onFlags.contains("d")) {
this.flags |= UNIX_LINES;
}
if (onFlags.contains("o")) {
this.flags |= PERL_OCTAL;
}
if (onFlags.contains("v")) {
this.flags |= VERIFY_GROUPS;
}
// Added to keep track of all inline flags (required for subroutines)
if (onFlags.contains("i")) {
this.flags |= CASE_INSENSITIVE;
}
if (onFlags.contains("s")) {
this.flags |= DOTALL;
}
if (onFlags.contains("J")) {
this.flags |= DUPLICATE_NAMES;
}
if (onFlags.contains("n")) {
this.flags |= EXPLICIT_CAPTURE;
}
if (onFlags.contains("m")) {
this.flags |= MULTILINE;
}
if (onFlags.contains("u")) {
this.flags |= UNICODE_CASE;
}
if (onFlags.contains("U")) {
this.flags |= UNICODE_CHARACTER_CLASS;
}
if (offFlags != null) {
if (offFlags.contains("x")) {
this.flags &= ~COMMENTS;
}
if (offFlags.contains("d")) {
this.flags &= ~UNIX_LINES;
}
if (offFlags.contains("o")) {
this.flags &= ~PERL_OCTAL;
}
if (offFlags.contains("v")) {
this.flags &= ~VERIFY_GROUPS;
}
// Added to keep track of all inline flags (required for subroutines)
if (offFlags.contains("i")) {
this.flags &= ~CASE_INSENSITIVE;
}
if (offFlags.contains("s")) {
this.flags &= ~DOTALL;
}
if (offFlags.contains("J")) {
this.flags &= ~DUPLICATE_NAMES;
}
if (offFlags.contains("n")) {
this.flags &= ~EXPLICIT_CAPTURE;
}
if (offFlags.contains("m")) {
this.flags &= ~MULTILINE;
}
if (offFlags.contains("u")) {
this.flags &= ~UNICODE_CASE;
}
if (offFlags.contains("U")) {
this.flags &= ~UNICODE_CHARACTER_CLASS;
}
}
}
private String replaceFlags(String onFlags, String offFlags) {
boolean flagsChanged = false;
StringBuilder newFlags = new StringBuilder(this.matcher.end() - this.matcher.start());
if (onFlags.contains("J")) {
onFlags = onFlags.replace("J", "");
this.flags |= DUPLICATE_NAMES;
flagsChanged = true;
}
if (onFlags.contains("n")) {
onFlags = onFlags.replace("n", "");
this.flags |= EXPLICIT_CAPTURE;
flagsChanged = true;
}
newFlags.append(onFlags);
if (offFlags != null) {
if (offFlags.contains("J")) {
offFlags = offFlags.replace("J", "");
this.flags &= ~DUPLICATE_NAMES;
flagsChanged = true;
}
if (offFlags.contains("n")) {
offFlags = offFlags.replace("n", "");
this.flags &= ~EXPLICIT_CAPTURE;
flagsChanged = true;
}
if (offFlags.length() != 0) {
newFlags.append('-').append(offFlags);
}
}
// System.out.println("replace: " + onFlags + "-" + offFlags + " >> " + newFlags);
return flagsChanged ? newFlags.toString() : null;
}
/**
* Refactors the flags during the pre-refactoring step
*/
private void preRefactorFlags() {
/*
* matches "(?onFlags-offFlags)" or "(?onFlags-offFlags:" (also
* matches a non-capture group - onFlags/offFlags are omitted)
*
* group: onFlags (empty string if none) group + 1: offFlags (empty
* string if none; null, if omitted)
*/
String onFlags = this.matcher.group(this.group);
String offFlags = this.matcher.group(this.group + 1);
char ending = this.match.charAt(this.match.length() - 1);
boolean isGroup = ending == ')';
if (!isGroup) {
this.increaseParenthesisDepth(DURING_PREREFACTOR);
}
this.setFlags(onFlags, offFlags);
String newFlags = this.replaceFlags(onFlags, offFlags);
if (newFlags == null) {
// no change
return;
}
if (newFlags.length() != 0 || !isGroup) {
this.replaceWith("(?" + newFlags + ending);
} else {
this.replaceWith("");
}
}
/**
* Refactors the flags during the refactoring step
*/
private void refactorFlags() {
/*
* matches "(?onFlags-offFlags)" or "(?onFlags-offFlags:" (also
* matches a non-capture group - onFlags/offFlags are omitted)
*
* group: onFlags (empty string if none) group + 1: offFlags (empty
* string if none; null, if omitted)
*/
String onFlags = this.matcher.group(this.group);
String offFlags = this.matcher.group(this.group + 1);
char ending = this.match.charAt(this.match.length() - 1);
boolean isGroup = ending == ')';
if (!isGroup) {
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
}
this.setFlags(onFlags, offFlags);
// StringBuilder newFlags = new StringBuilder(matcher.end()
// - matcher.start());
//
// newFlags.append(onFlags);
//
// if (offFlags != null) {
// // if (offFlags.length() != 0)
// // - above condition handled in preRefactorFlags()
// newFlags.append('-').append(offFlags);
// }
// if (!supportedSyntax(NONCAPTURE_GROUPS) && !isGroup) {
// if (newFlags.length() == 0) {
// // i.e. a non-capture group "(?:RegEx)"
// // (convert to a capture group)
// replaceWith(startNonCaptureGroup());
// } else
// replaceWith(startNonCaptureGroup() + "(?" + newFlags + ")");
// }
}
/**
* Refactors the flags (after the refactoring step)
*/
private void afterRefactorFlags() {
String onFlags = this.matcher.group(this.group);
String offFlags = this.matcher.group(this.group + 1);
char ending = this.match.charAt(this.match.length() - 1);
boolean isGroup = ending == ')';
if (!isGroup) {
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
}
this.setFlags(onFlags, offFlags);
// Ensure that all illegal flags are removed
boolean flagsChanged = false;
StringBuilder newFlags = new StringBuilder(this.matcher.end() - this.matcher.start());
if (onFlags.contains("o")) {
onFlags = onFlags.replace("o", "");
this.flags |= PERL_OCTAL;
flagsChanged = true;
}
if (onFlags.contains("v")) {
onFlags = onFlags.replace("v", "");
this.flags |= VERIFY_GROUPS;
flagsChanged = true;
}
newFlags.append(onFlags);
if (offFlags != null) {
if (offFlags.contains("o")) {
offFlags = offFlags.replace("o", "");
this.flags &= ~PERL_OCTAL;
flagsChanged = true;
}
if (offFlags.contains("v")) {
offFlags = offFlags.replace("v", "");
this.flags &= ~VERIFY_GROUPS;
flagsChanged = true;
}
if (offFlags.length() != 0) {
newFlags.append('-').append(offFlags);
}
}
newFlags = flagsChanged ? newFlags : null;
if (newFlags == null) {
// no change
return;
}
if (newFlags.length() != 0 || !isGroup) {
this.replaceWith("(?" + newFlags + ending);
} else {
this.replaceWith("");
}
}
/**
* Refactors a capturing group during the pre-refactoring step
*
* @param isNamedGroup
* whether the capture group is a named group
* @param form
* the capture group's 0-based form
*/
private void preRefactorCaptureGroup(final boolean isNamedGroup, final int form) {
// TODO: verify this works fully
// if (inSubroutine())
// throw error("Subroutines do not support capture groups", matcher.start(group));
this.increaseParenthesisDepth(DURING_PREREFACTOR);
if (!isNamedGroup && this.has(EXPLICIT_CAPTURE)) {
this.replaceWith(startNonCaptureGroup());
return;
}
if (this.inSubroutine()) {
return;
}
this.increaseCurrentGroup(DURING_PREREFACTOR);
Subpattern subpattern;
if (!this.inSubroutine()) {
// Initialize subpattern
subpattern = new Subpattern(this);
subpattern.setStart(this.matcher.end(this.group));
// System.out.println("Total groups: " + totalGroups);
// TODO: include flags and prepend subpattern with them
subpattern.setFlags(this.flags);
// flags are maintained
this.openSubpatterns.put(this.parenthesisDepth, subpattern);
} else {
subpattern = null;
}
if (isNamedGroup) {
/*
* matches a named capture group
* "(?" (form 0)
* "(?'name'" (form 1)
* "(?P" (form 2)
*
* group: everything after first symbol
* group + 1: the name
* (6 groups)
*/
this.subpatternNameExpected();
this.checkForMissingTerminator(">'>", form);
String groupName = this.matcher.group(this.group + 1);
int occurrence = this.increaseGroupCount(groupName);
if (occurrence != 1 && !this.has(DUPLICATE_NAMES)) {
throw this.error(DUPLICATE_NAME, this.matcher.start(this.group));
}
String mappingName = getMappingName(groupName, occurrence);
this.addNamedGroup(mappingName, TARGET_UNKNOWN);
this.addSubpattern(mappingName, subpattern);
if (!this.inBranchReset()) {
this.namedGroup++;
} else {
this.unnamedGroup++;
}
// TODO: verify this is correct
if (!this.inSubroutine() && this.has(DOTNET_NUMBERING)) {
if (!this.inBranchReset()) {
this.perlGroupMapping().put(this.currentGroup, mappingName);
} else {
this.perlGroupMapping().put(this.currentGroup, getMappingName(this.currentGroup, 0));
}
}
} else {
this.unnamedGroup++;
// TODO: verify this is correct
if (!this.inSubroutine() && this.has(DOTNET_NUMBERING)) {
String mappingName = getMappingName(this.unnamedGroup, 0);
this.perlGroupMapping().put(this.currentGroup, mappingName);
}
}
if (!this.has(DOTNET_NUMBERING) || this.inBranchReset() || !isNamedGroup) {
int groupIndex = this.has(DOTNET_NUMBERING) ? this.unnamedGroup : this.currentGroup;
String groupName = wrapIndex(groupIndex);
int occurrence = this.increaseGroupCount(groupName);
String mappingName = getMappingName(groupIndex, occurrence);
// add mapping for group index
this.addUnnamedGroup(mappingName, TARGET_UNKNOWN);
this.addSubpattern(mappingName, subpattern);
}
// TODO: what is named group in branch reset pattern??
/*
* TODO: how to add numeric subpattern for case of
* 1) has(DOTNET_NUMBERING)
* 2) !inBranchReset()
* 3) isNamedGroup
*/
}
/**
* Checks that the necessary terminating character is present.
*
*
* Endings
is a string where each character is an ending.
* The first character (index 0) refers to the ending for form 0, the
* second character (index 1) for form 1, etc.
*
*
*
* Forms that have no terminating character must occur after the
* forms that do. For example, if form 3 has no ending,
* endings
would be of length 3, and characters 0 - 2 would
* have the endings for forms 0 - 2.
*
*
*
* In this case, if form
was equal to three, the below
* method returns successfully since there is no missing terminator -
* there is no terminator at all, so there is no way that it is missing.
*
*
* @param endings
* the endings to test
* @param form
* the 0-based form
*
* @throws PatternSyntaxException
* If the necessary terminator is missing
*/
private void checkForMissingTerminator(final String endings, final int form) {
boolean missingTerminator = form < endings.length()
&& !this.matcher.group(this.group).endsWith(endings.substring(form, form + 1));
if (missingTerminator) {
throw this.error(MISSING_TERMINATOR, this.matcher.end(this.group));
}
}
/**
* Checks that a subpattern name is present.
*
* @throws PatternSyntaxException
* If the subpattern name is missing
*/
private void subpatternNameExpected() {
// name is blank and [occurrence] is null
boolean missingName = this.matcher.start(this.group + 1) == this.matcher.end(this.group + 1)
&& this.matcher.start(this.group + 2) == -1;
if (missingName) {
throw this.error(SUBPATTERN_NAME_EXPECTED, this.matcher.start(this.group));
}
}
/**
* Refactors a capturing group during the refactoring step
*
* @param isNamedGroup
* whether the group is a named group
*/
private void refactorCaptureGroup(final boolean isNamedGroup) {
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
this.increaseCurrentGroup(!DURING_PREREFACTOR);
if (isNamedGroup && !this.inBranchReset()) {
this.namedGroup++;
} else {
this.unnamedGroup++;
}
boolean usedInCondition;
String namedMappingName;
if (isNamedGroup) {
String groupName = this.matcher.group(this.group);
int occurrence = this.increaseGroupCount(groupName);
namedMappingName = getMappingName(groupName, occurrence);
// add mapping for group name
this.addNamedGroup(namedMappingName, this.totalGroups);
usedInCondition = this.usedInCondition(namedMappingName)
|| this.usedInCondition(getMappingName(groupName, 0));
} else {
usedInCondition = false;
namedMappingName = null;
}
int groupIndex = this.getCurrentGroup(isNamedGroup && !this.inBranchReset());
String groupName = wrapIndex(groupIndex);
int occurrence = this.increaseGroupCount(groupName);
String mappingName = getMappingName(groupName, occurrence);
// add mapping for group index
this.addUnnamedGroup(mappingName, this.totalGroups);
// TODO: uncomment to use for debugging
// System.out.printf("%s (%s): %s%n", mappingName, totalGroups, getSubpatterns().get(mappingName));
if (!usedInCondition) {
usedInCondition = this.usedInCondition(mappingName) || this.usedInCondition(getMappingName(groupName, 0));
if (!usedInCondition) {
if (isNamedGroup) {
// remove name part
this.replaceWith("(");
}
return;
}
}
// remove name part (if applicable) and convert form
// "(?RegEx)" -> "(?:(RegEx)())"
this.replaceWith(startNonCaptureGroup() + "(");
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
// add a MatchState to track where to add
// the necessary "()" at the end of the capture group
this.addTestGroup.push(new AddTestGroupState(mappingName, this.parenthesisDepth, namedMappingName));
}
private void afterRefactorCaptureGroup() {
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
this.increaseCurrentGroup(!DURING_PREREFACTOR);
}
private int getCurrentGroup(final boolean isNamedGroup) {
if (this.has(DOTNET_NUMBERING)) {
if (isNamedGroup) {
return this.unnamedGroupCount + this.namedGroup;
} else {
return this.unnamedGroup;
}
} else {
return this.currentGroup;
}
}
/**
* Returns whether the specified group is used as a condition
*
* @param mappingName
* the mapping name for the group
*
* @return true
if the specified group is used as a
* condition; false
otherwise.
*/
private boolean usedInCondition(final String mappingName) {
return this.requiresTestingGroup.contains(mappingName);
}
/**
* Returns the unwrapped group index.
*
* If groupIndex
is surrounded by square brackets, they are
* removed, and the group index is returned. Otherwise, the given group
* index is
* returned unmodified.
*
* @param groupIndex
* the group index
* @return the unwrapped index
*/
static String unwrapIndex(final String groupIndex) {
if (groupIndex.charAt(0) == '[' && groupIndex.charAt(groupIndex.length() - 1) == ']') {
return groupIndex.substring(1, groupIndex.length() - 1);
}
return groupIndex;
}
/**
* Refactors a conditional pattern during the pre-refactoring step
*
* @param isNamedGroup
* whether the conditional is a name or number
* @param form
* the 0-based form for the conditional
*/
private void preRefactorConditionalPattern(final boolean isNamedGroup, final int form) {
if (this.inSubroutine()) {
throw this.error("Subroutines do not support named/unnamed conditionals", this.matcher.start(this.group));
}
this.increaseParenthesisDepth(DURING_PREREFACTOR);
String mappingName;
if (isNamedGroup) {
/*
* matches a named reference condition
* "(?()" (form 0),
* "(?('name')" (form 1), or
* "(?(name)" (form 2)
*
* group: everything after first symbol (excluding ")")
* group + 1: the name
* (6 groups)
*/
// if nothing after "(?("
if (this.matcher.start(this.group) == this.matcher.end(this.group)) {
throw this.error(ASSERTION_EXPECTED, this.matcher.end(this.group));
}
this.checkForMissingTerminator(">'", form);
if (!this.match.endsWith(")")) {
throw this.error(UNCLOSED_GROUP, this.matcher.end(this.group));
}
String groupName = this.matcher.group(this.group + 1);
if (groupName.charAt(0) == '[') {
String tmpMappingName = this.getAbsoluteGroup(unwrapIndex(groupName), this.currentGroup);
if (isAnyGroup(tmpMappingName)) {
// reference is an unnamed group
// (any occurrence is possible)
groupName = anyGroupName(tmpMappingName);
String groupOccurrence = this.matcher.group(this.group + 2);
mappingName = this.getAbsoluteGroup(groupName, groupOccurrence);
} else {
// reference is a named group
// (occurrence is ignored)
mappingName = tmpMappingName;
}
} else {
// named group
String groupOccurrence = this.matcher.group(this.group + 2);
mappingName = this.getAbsoluteGroup(groupName, groupOccurrence);
}
} else {
mappingName = this.getAbsoluteGroup(this.matcher.group(this.group), this.currentGroup);
}
this.requiresTestingGroup.add(mappingName);
// add a MatchState to handle the "else" branch
this.handleElseBranch.push(new MatchState("", this.parenthesisDepth, -1));
}
/**
* Refactors a conditional pattern during the refactoring step
*
* @param isNamedGroup
* whether the condition is a name or number
*/
private void refactorConditionalPattern(final boolean isNamedGroup) {
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
String groupName = this.normalizeGroupName(
isNamedGroup ? this.matcher.group(this.group) : "[" + this.matcher.group(this.group) + "]");
// start of groupName / number
int start = this.matcher.start(this.group);
if (groupName.equals("[0]")) {
throw this.error(INVALID_CONDITION0, start);
}
String groupOccurrence = isNamedGroup ? this.matcher.group(this.group + 1) : null;
String mappingName = this.getAbsoluteGroup(groupName, groupOccurrence);
Integer mappingIndexI = this.pattern.getMappedIndex(mappingName);
Integer testConditionGroupI = this.getTestingGroup(mappingName);
// System.out.println("refactorConditionalPattern: " + mappingName);
if (isAnyGroup(mappingName)) {
// int groupCount = groupCount(groupName);
// int groupCount = getGroupCount(groupName);
int groupCount = this.pattern.getGroupCount(groupName);
// TODO: verify this change is valid
// modified to add support for +x on conditionals (can be used in subroutines)
if (groupCount == 0) {
if (groupName.equals("DEFINE")) {
// (?(DEFINE)...) is a special condition, which should always be false
// (allows defining subpatterns, without them being matched at that point)
// TODO: is it possible to completely remove DEFINE group from internal pattern ??
// TODO: ensure DEFINE group has no alternations - otherwise, throw error
this.replaceWith(startNonCaptureGroup() + fail());
} else {
// if (has(VERIFY_GROUPS))
throw this.error(NONEXISTENT_SUBPATTERN, start);
}
// the specified group doesn't exist
// replaceWith(startNonCaptureGroup() + fail());
} else if (!this.allDone(groupName, groupCount)) {
// System.out.println("Some groups occur later on: " + mappingName);
// some groups occur later on
this.replaceWith(
startNonCaptureGroup() + "\\g{" + this.addErrorTrace(start) + "test-" + mappingName + "}");
testConditionGroupI = TARGET_UNKNOWN;
} else {
// System.out.println("All groups have already occurred: " + mappingName);
// all groups have already occurred
this.replaceWith(startNonCaptureGroup() + this.acceptTestingGroup(mappingName));
// TODO: need to rename condition group
// testConditionGroupI = BRANCH_RESET;
testConditionGroupI = 0;
}
} else if (mappingIndexI == null) {
// if (has(VERIFY_GROUPS))
throw this.error(NONEXISTENT_SUBPATTERN, start);
// the specified group doesn't exist
// replaceWith(startNonCaptureGroup() + fail());
} else if (testConditionGroupI == null) {
// the specified group exists, but occurs later
this.replaceWith(startNonCaptureGroup() + "\\g{" + this.addErrorTrace(start) + "test-" + mappingName + "}");
testConditionGroupI = TARGET_UNKNOWN;
} else {
// the specified group has already occurred
this.replaceWith(startNonCaptureGroup() + RefactorUtility.acceptTestingGroup(testConditionGroupI));
}
// add a MatchState to handle the "else" branch
this.handleElseBranch.push(new MatchState(mappingName, this.parenthesisDepth, testConditionGroupI));
}
/**
* Refactors a back reference during the pre-refactoring step
*/
private void preRefactorBackreference() {
String groupName = this.matcher.group(this.group);
if (!RefactorUtility.isUnnamedGroup(groupName)) {
this.anyGroupReferences.add(groupName);
}
}
/**
* Refactors a back reference during the refactoring step
*
* @param isNamedGroup
* whether the back reference is by name or number
* @param form
* the form for the back reference
* @param digitGroup
* the index for the group which stores the digit (if any)
* that follows the back reference
*/
private void refactorBackreference(final boolean isNamedGroup, final int form, final int digitGroup) {
String mappingName;
String trailingDigits = "";
// start of groupName / number
int start;
if (isNamedGroup) {
/*
* matches a back reference (by name)
* "\g{name}" (form 0)
* "\k" (form 1)
* "\k'name'" (form 2)
* "\k{name}" (form 3)
* "(?P=name)" (form 4)
*
* group: everything after first symbol
* group + 1: the name
* (10 groups)
*/
this.subpatternNameExpected();
this.checkForMissingTerminator("}>'})", form);
start = this.matcher.start(this.group + 1);
String groupName = this.normalizeGroupName(this.matcher.group(this.group + 1));
String groupOccurrence = this.matcher.group(this.group + 2);
mappingName = this.getAbsoluteGroup(groupName, groupOccurrence);
} else {
/*
* matches a back reference (by number)
* "\n" (form 0)
* "\gn" (form 1)
* "\g{n}" or "\g{-n}" (form 2)
*
* group: the number
* (3 groups)
*/
int groupIndex;
start = this.matcher.start(this.group);
if (form == 0) {
java.util.regex.MatchResult backreference = this.getBackreference(this.matcher.group(this.group),
start);
// TODO: uncomment when debugging
// if (backreference == null)
// System.out.println("Null reference: " + matcher.group(group));
if (backreference == null) {
// not a back reference (i.e. an octal code)
// (handled in above function call)
return;
}
groupIndex = Integer.parseInt(backreference.group(1));
// TODO: verify functionality with DOTNET_NUMBERING
// Uses if, not while, because getBackreference method checks backreference
// uses smallest digit for # of digits in current group
if (groupIndex > this.currentGroup && groupIndex >= 10) {
trailingDigits = String.valueOf(groupIndex % 10);
groupIndex /= 10;
}
trailingDigits += backreference.group(2);
String groupName = wrapIndex(groupIndex);
mappingName = getMappingName(groupName, 0);
} else {
mappingName = this.getAbsoluteGroup(this.matcher.group(this.group), this.currentGroup);
}
}
// Checked after octal check
// TODO: verify this works fully
// if (inSubroutine())
// throw error("Subroutines do not support backreferences", matcher.start(group));
trailingDigits += this.matcher.group(digitGroup);
// retrieve the actual group index for the specified groupIndex
Integer mappedIndexI;
// replace back reference with back reference RegEx
if (isAnyGroup(mappingName)) {
String groupName = anyGroupName(mappingName);
if (groupName.equals("[0]")) {
throw this.error(ZERO_REFERENCE, start);
}
// int groupCount = groupCount(groupName);
int groupCount = this.getGroupCount(groupName);
if (groupCount == 0) {
// form 0 is \n (for unnamed group)
if (isNamedGroup || form != 0 || this.has(VERIFY_GROUPS)) {
throw this.error(NONEXISTENT_SUBPATTERN, start);
}
// Used to mimic java functionality
// Not needed
// (required because otherwise group could point to a valid capture group, due to added groups)
// !isNamedGroup, form == 0 ('\n' syntax), and !has(VERIFY_GROUPS)
this.replaceWith(fail() + trailingDigits);
} else if (groupCount == 1) {
String tmpMappingName = getMappingName(groupName, 1);
trailingDigits = RefactorUtility.fixTrailing(trailingDigits);
if (this.getGroupCount(groupName) == groupCount) {
// group has already occurred
this.replaceWith(this.acceptGroup(tmpMappingName) + trailingDigits);
} else {
// group occurs later on
this.replaceWith(
"\\g{" + this.addErrorTrace(start) + "group-" + tmpMappingName + "}" + trailingDigits);
}
} else if (this.allDone(groupName, groupCount)) {
// all groups have already occurred
String acceptGroup = this.acceptGroup(mappingName);
this.replaceWith(nonCaptureGroup(acceptGroup) + trailingDigits);
} else {
// some groups occur later on
this.replaceWith(nonCaptureGroup("\\g{" + this.addErrorTrace(start) + "group-" + mappingName + "}")
+ trailingDigits);
}
} else if ((mappedIndexI = this.pattern.getMappedIndex(mappingName)) == null) {
// if (has(VERIFY_GROUPS))
throw this.error(NONEXISTENT_SUBPATTERN, start);
// replaceWith(fail() + trailingDigits);
} else {
int mappedIndex = mappedIndexI;
trailingDigits = RefactorUtility.fixTrailing(trailingDigits);
if (mappedIndex == TARGET_UNKNOWN) {
// group hasn't occurred yet
this.replaceWith("\\g{" + this.addErrorTrace(start) + "group-" + mappingName + "}" + trailingDigits);
} else {
// group already occurred
this.replaceWith("\\" + mappedIndex + trailingDigits);
}
}
}
/**
* Indicates whether all groups with the specified group name have
* already appeared
*
* @param groupName
* the group name
* @param groupCount
* the total number of groups with the specified name
* @return true
if, and only if, all groups with the given
* name have already appeared
*/
private boolean allDone(final String groupName, final int groupCount) {
return this.getTestingGroup(getMappingName(groupName, groupCount)) != null;
// if (RefactorUtility.isUnnamedGroup(groupName)) {
// // e.g. [1][0]
// return getGroupCount(groupName) == groupCount;
// } else {
// // e.g. groupName[0]
// return getTestingGroup(getMappingName(groupName, groupCount)) != null;
// }
}
/**
* Returns a MatchResult
containing data about the back
* reference.
*
* @param backreference
* a string (of numbers) that make up a back reference
* @param start
* the start index for the back reference (used in any thrown
* exceptions)
* @return a MatchResult
contain data about the back
* reference, or null if the backreference
doesn't
* refer to a back reference
*/
private java.util.regex.MatchResult getBackreference(final String backreference, final int start) {
if (backreference.charAt(0) == '0') {
String input = this.has(PERL_OCTAL) ? backreference : backreference.substring(1);
@SuppressWarnings("hiding")
java.util.regex.Matcher matcher = perl_octal.matcher(input);
if (!matcher.matches()) {
// +1 because leading '0'
int errorLoc = start + 1;
throw this.error(ILLEGAL_OCTAL_ESCAPE, errorLoc);
}
String octal = matcher.group(1);
String trailing = matcher.group(2);
int octalCode = Integer.parseInt(octal, 8);
String hexCode = String.format(hexCodeFormat, octalCode);
this.replaceWith(hexCode + trailing);
return null;
}
if (this.inCharClass()) {
if (this.has(PERL_OCTAL)) {
@SuppressWarnings("hiding")
java.util.regex.Matcher matcher = perl_octal.matcher(backreference);
if (!matcher.matches()) {
throw this.error(ILLEGAL_OCTAL_ESCAPE, start);
}
String octal = matcher.group(1);
String trailing = matcher.group(2);
int octalCode = Integer.parseInt(octal, 8);
String hexCode = String.format(hexCodeFormat, octalCode);
this.replaceWith(hexCode + trailing);
return null;
} else {
// ignore back reference in character class
return null;
}
}
int digitCount = RefactorUtility.digitCount(this.currentGroup);
@SuppressWarnings("hiding")
java.util.regex.Pattern pattern = getDigitCountPattern(digitCount);
@SuppressWarnings("hiding")
java.util.regex.Matcher matcher = pattern.matcher(backreference);
matcher.matches();
int groupIndex = Integer.parseInt(matcher.group(1));
String trailing = matcher.group(2);
if (this.has(PERL_OCTAL) && (trailing.length() != 0 || digitCount > 1 && groupIndex > this.currentGroup)) {
// an octal escape
matcher = perl_octal.matcher(backreference);
if (!matcher.matches()) {
throw this.error(ILLEGAL_OCTAL_ESCAPE, start);
}
String octal = matcher.group(1);
trailing = matcher.group(2);
int octalCode = Integer.parseInt(octal, 8);
String hexCode = String.format(hexCodeFormat, octalCode);
this.replaceWith(hexCode + trailing);
return null;
}
return matcher.toMatchResult();
}
/**
* Refactors an assert condition during the pre-refactoring step
*/
private void preRefactorAssertCondition() {
/*
* matches an assert condition
* "(?(?=)", "(?(?!)", "(?(?<=)", or "(?(?
// "(?:(?:(assert)())?+(?:(?:\m)then|(?!\m)else))"
// (conversion works in PCRE, but only partly in Java)
// does not work during repetition
// TODO: verify conversions work in PCRE and Java
this.replaceWith(startNonCaptureGroup() + startNonCaptureGroup() + "(" + this.matcher.group(this.group));
// increase parenthesis depth to that of the assertion
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
this.handleEndAssertCond.add(new MatchState("", this.parenthesisDepth, TARGET_UNKNOWN));
}
/**
* Returns the mapping name associated with the event to end an assert
* condition
*
* @return
*/
private String endAssertCondMappingName() {
return "$endAssertCond";
}
/**
* Initializes a "branch reset" subpattern during the pre-refactoring
* step.
*/
private void preRefactorBranchReset() {
this.increaseParenthesisDepth(DURING_PREREFACTOR);
this.branchReset.push(new BranchResetState(this.currentGroup, this.unnamedGroup, this.parenthesisDepth));
}
/**
* Initializes a "branch reset" subpattern during the refactoring step.
*/
private void refactorBranchReset() {
this.replaceWith(startNonCaptureGroup());
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
this.branchReset.push(new BranchResetState(this.currentGroup, this.unnamedGroup, this.parenthesisDepth));
}
/**
* Refactors a numeric range during the refactoring step
*/
private void refactorNumericRange() {
String mode = this.matcher.group(this.group);
// boolean rawMode = matcher.group(group + 1) != null;
boolean inclusiveStart = this.matcher.start(this.group + 1) == -1;
String start = this.matcher.group(this.group + 2);
boolean inclusiveEnd = this.matcher.start(this.group + 3) == -1;
String end = this.matcher.group(this.group + 4);
int endRange = this.matcher.end(this.group + 4);
// System.out.println("Numeric range: " + start + "\t" + end);
if (start == null || end == null) {
// error is set at character after "["
throw this.error(NUMERIC_RANGE_EXPECTED, this.matcher.end(this.group) + 1);
}
if (endRange >= this.text.length() || this.text.charAt(endRange) != ']') {
throw this.error(UNCLOSED_RANGE, endRange);
}
if (!this.match.endsWith(")")) {
// error is set at character after "]"
throw this.error(UNCLOSED_GROUP, endRange + 1);
}
// TODO: add bug testing
String range = PatternRange.boundedRange(start, inclusiveStart, end, inclusiveEnd, new RangeMode(mode));
range = nonCaptureGroup(range);
this.replaceWith(range);
// if (true)
// return;
//
// try {
// String range = Range.range(start, end, mode);
//
// // if (!rawMode)
// range = nonCaptureGroup(range);
//
// replaceWith(range);
// } catch (PatternSyntaxException e) {
// String desc = e.getDescription();
// int index = e.getIndex();
//
// if (desc.equals(INVALID_DIGIT_START)) {
// int startIndex = matcher.start(group + 1);
// throw error(desc, startIndex + index);
// } else if (desc.equals(INVALID_DIGIT_END)) {
// int endIndex = matcher.start(group + 2);
// throw error(desc, endIndex + index);
// }
//
// } catch (Exception e) {
// String message = e.getMessage();
//
// if (message.equals(INVALID_BASE)) {
// int errorIndex = matcher.start(group) + mode.indexOf('Z') +
// 1;
// throw error(message + " in numeric range", errorIndex);
// }
// }
}
/**
* Refactors the hex unicode during the after-refactoring step
*/
private void afterRefactorHexUnicode() {
int hex = 16;
int codePoint;
try {
codePoint = Integer.parseInt(this.matcher.group(this.group), hex);
if (codePoint <= 0xFF) {
this.replaceWith(String.format(hexCodeFormat, codePoint));
} else {
// char[] array = Character.toChars(hexCode);
//
// if (array.length == 1)
// replaceWith(String.format("\\x{%1$04x}", (int)
// array[0]));
// else
// replaceWith(String.format("\\x{%1$04x}\\x{%2$04x}",
// (int) array[0], (int) array[1]));
if (Character.charCount(codePoint) == 1) {
this.replaceWith(String.format(unicodeFormat, codePoint));
} else {
this.replaceWith(new String(Character.toChars(codePoint)));
}
}
} catch (RuntimeException e) {
throw this.error(INVALID_HEX_CODE, this.matcher.start());
}
}
/**
* Refactors the hex char during the after-refactoring step
*/
private void afterRefactorHexChar() {
String hexCode = this.matcher.group(this.group);
if (hexCode.length() == 1) {
// matched "\xh"
// add leading 0 (necessary for java syntax)
this.replaceWith("\\x0" + hexCode);
}
}
/**
* Refactors a unicode character during the after-refactoring step
*/
private void afterRefactorUnicode() {
String unicode = this.matcher.group(this.group);
StringBuilder replacement = new StringBuilder();
replacement.append("\\u");
for (int i = unicode.length(); i < 4; i++) {
replacement.append('0');
}
this.replaceWith(replacement.append(unicode).toString());
}
/**
* Refactors a posix class during the after-refactoring step
*/
private void afterRefactorPosixClass() {
// if (patternSyntax == JAVA) {
if (!this.inCharClass()) {
throw this.error(POSIX_OUTSIDE_CLASS, this.matcher.start());
}
boolean negated = this.matcher.group(this.group).length() != 0;
String posixClass = this.matcher.group(this.group + 1);
if (posixClass.equals("word")) {
this.replaceWith(negated ? "\\W" : "\\w");
} else {
String value = posixClasses.get(posixClass);
if (value != null) {
this.replaceWith("\\" + (negated ? "P" : "p") + "{" + value + "}");
} else {
throw this.error(UNKNOWN_POSIX_CLASS, this.matcher.start(this.group + 1));
}
}
// }
}
/**
* Refactors a control character during the after-refactoring step
*/
private void afterRefactorControlCharacter() {
char controlCharacter = this.matcher.group(this.group).charAt(0);
int offset;
if (controlCharacter >= 'a' && controlCharacter <= 'z') {
offset = 'a' - 1;
} else {
offset = 'A' - 1;
}
this.replaceWith(String.format(hexCodeFormat, controlCharacter - offset));
}
/**
* Refactors the remaining parts during the pre-refactoring step
*/
private void preRefactorOthers() {
if (this.match.equals("(")) {
if (!this.inCharClass()) {
this.increaseParenthesisDepth(DURING_PREREFACTOR);
}
} else if (this.match.equals(")")) {
if (!this.inCharClass()) {
this.preRefactorCloseParenthesis();
}
} else if (this.match.equals("|")) {
if (!this.inCharClass()) {
this.preRefactorPike();
}
} else if (this.match.equals("[")) {
this.increaseCharClassDepth();
} else if (this.match.equals("]")) {
this.decreaseCharClassDepth();
} else if (this.match.equals("{")) {
// Tracked to prevent Android issue, where '}' must be escaped, but not in Java
// Only track if not in character class (Android allows '}' in character group)
if (!this.inCharClass()) {
if (Boolean.FALSE.equals(this.isInCurlyBrace)) {
// If not currently in a curly brace, mark as in one
this.isInCurlyBrace = true;
} else {
// If already in a curly brace, the pattern has an error,
// since shouldn't ever have two open curly braces without a close between them
// If flag was already null, does nothing
this.isInCurlyBrace = null;
}
}
} else if (this.match.equals("}")) {
// Tracked to prevent Android issue, where '}' must be escaped, but not in Java
// Only track if not in character class (Android allows '}' in character group)
if (!this.inCharClass()) {
if (Boolean.TRUE.equals(this.isInCurlyBrace)) {
// An open curly brace, followed at some point by a close one
this.isInCurlyBrace = false;
} else if (Boolean.FALSE.equals(this.isInCurlyBrace)) {
// A closed curly brace by itself (escape it in the internal pattern)
// (allows compiling regex when doing Android development, which doesn't allow '}' to be unescaped)
this.replaceWith("\\}");
}
}
} else if (this.match.equals("#")) {
if (this.has(COMMENTS) && !this.inCharClass()) {
this.handleStartComments();
// parsePastLine();
}
} else if (this.match.startsWith("\\Q")) {
// if (!supportedSyntax(QE_QUOTATION)) {
if (this.isJava1_5) {
int start = 2;
int end = this.match.length() - (this.match.endsWith("\\E") ? 2 : 0);
this.replaceWith(this.literal(this.match.substring(start, end)));
}
if (this.isInComments) {
this.checkForLineTerminator();
}
} else if (this.isInComments) {
this.checkForLineTerminator();
}
}
/**
* Refactors the remaining parts during the refactoring step
*/
private void refactorOthers() {
if (this.match.equals("(")) {
if (!this.inCharClass()) {
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
}
} else if (this.match.equals(")")) {
if (!this.inCharClass()) {
this.refactorCloseParenthesis();
}
} else if (this.match.equals("|")) {
if (!this.inCharClass()) {
this.refactorPike();
}
} else if (this.match.equals("[")) {
this.increaseCharClassDepth();
} else if (this.match.equals("]")) {
this.decreaseCharClassDepth();
} else if (this.match.equals("#")) {
if (this.has(COMMENTS) && !this.inCharClass()) {
this.handleStartComments();
// parsePastLine();
// } else if (match.equals("\\Q")) {
// skipQuoteBlock();
// }
}
// Block isn't needed since duplicate of next block (FindBugs warning)
// } else if (this.match.startsWith("\\Q")) {
// // Skip quote block
//
// if (this.isInComments) {
// this.checkForLineTerminator();
// }
} else if (this.isInComments) {
this.checkForLineTerminator();
}
}
/**
* Refactors the remaining parts (after the refactoring step)
*/
private void afterRefactorOthers() {
if (this.match.equals("(")) {
if (!this.inCharClass()) {
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
}
} else if (this.match.equals(")")) {
if (!this.inCharClass()) {
this.afterRefactorCloseParenthesis();
}
} else if (this.match.equals("[")) {
this.increaseCharClassDepth();
} else if (this.match.equals("]")) {
this.decreaseCharClassDepth();
} else if (this.match.equals("#")) {
if (this.has(COMMENTS) && !this.inCharClass()) {
this.handleStartComments();
// parsePastLine();
// } else if (match.equals("\\Q")) {
// skipQuoteBlock();
}
} else if (this.match.startsWith("\\Q")) {
// Skip quote block
if (this.isInComments) {
this.checkForLineTerminator();
}
} else if (this.match.equals("\\X")) {
this.replaceWith("(?>\\P{M}\\p{M}*)");
} else if (this.isInComments) {
this.checkForLineTerminator();
}
}
/**
* Pattern to match the end of a quote block (either a "\E" or the end of the regex)
*/
// private static final java.util.regex.Pattern endQuoteBlockPattern = java.util.regex.Pattern.compile("\\\\E|$");
// /**
// * Skips from \Q
to \E
. If there is no \E
the rest of the string is
// skipped.
// */
// private void skipQuoteBlock()
// {
// // Store the current pattern (restored at end)
// java.util.regex.Pattern currentPattern = matcher.pattern();
//
// // Find the end of the quote block (thus skipping it)
// matcher.usePattern(endQuoteBlockPattern).find();
//
// // Replace the previous pattern
// matcher.usePattern(currentPattern);
// }
/**
* Returns a literal pattern String
for the specified
* String
.
*
* This method produces a String
that can be used to
* create
* a Pattern
that would match the string s
as
* if
* it were a literal pattern.
*
* Metacharacters or escape sequences in the input sequence
* will be
* given
* no special meaning.
*
* Note: this function escapes each metacharacter
* individually,
* whereas {@link Pattern#quote(String)} uses a \Q..\E
block. This
* function is used when refactoring a \Q..\E
block into a
* RegEx patternSyntax that doesn't support the functionality.
*
* @param s
* The string to be literalized
* @return A literal string replacement
*/
public String literal(final String s) {
return Pattern.literal(s,
this.inCharClass() ? Pattern.REGEX_CHAR_CLASS_METACHARACTERS : Pattern.REGEX_METACHARACTERS);
}
/**
* Refactors a close parenthesis during the pre-refactoring step
*/
private void preRefactorCloseParenthesis() {
if (this.parenthesisDepth == 0) {
throw this.error(UNMATCHED_PARENTHESES, this.matcher.start());
}
if (this.atRightDepth(this.handleElseBranch)) {
this.handleElseBranch.pop();
} else if (this.atRightDepth(this.branchReset)) {
this.endBranchReset();
}
this.decreaseParenthesisDepth(DURING_PREREFACTOR);
}
/**
* Refactors a close parenthesis during the refactoring step
*/
private void refactorCloseParenthesis() {
if (this.atRightDepth(this.addTestGroup)) {
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
this.replaceWith(")())");
this.totalGroups++;
String mappingName = this.addTestGroup.peek().mappingName;
String namedMappingName = this.addTestGroup.peek().namedMappingName;
// add a mapping from mapping name to its test condition group
this.addTestingGroup(mappingName, this.totalGroups);
if (namedMappingName != null) {
this.addTestingGroup(namedMappingName, this.totalGroups);
}
this.addTestGroup.pop();
// done last because inside pars is same depth
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
} else if (this.atRightDepth(this.handleElseBranch)) {
// no else branch for condition (i.e. only a "then" branch)
// e.g. (?('name')...)
// add an empty else branch
// if condition isn't true, matches the empty string
Integer testConditionGroupI = this.handleElseBranch.peek().testConditionGroupI;
if (testConditionGroupI == null) {
// the specified group doesn't exist,
// always use else branch
this.replaceWith("|)");
} else {
int testConditionGroup = testConditionGroupI;
if (testConditionGroup == TARGET_UNKNOWN) {
String mappingName = this.handleElseBranch.peek().mappingName;
// the specified group exists, but occurs later
this.replaceWith("|\\g{" + this.addErrorTrace(this.matcher.start(this.group)) + "testF-"
+ mappingName + "})");
// } else if (testConditionGroup == BRANCH_RESET) {
// String mappingName =
// handleElseBranch.peek().mappingName;
//
// // all groups have already occurred
// replaceWith("|" + failTestingGroup(mappingName) +
// ")");
} else {
String mappingName = this.handleElseBranch.peek().mappingName;
StringBuilder replacement = new StringBuilder();
replacement.append('|');
if (testConditionGroup == 0) {
replacement.append(this.failTestingGroup(mappingName));
} else {
replacement.append(RefactorUtility.failTestingGroup(testConditionGroup));
}
replacement.append(')');
if (mappingName.equals(this.endAssertCondMappingName())) {
replacement.append(')');
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
}
// the specified group has already occurred
this.replaceWith(replacement.toString());
}
}
// remove this pike state
this.handleElseBranch.pop();
// done last because inside pars is same depth
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
} else if (this.atRightDepth(this.branchReset)) {
this.endBranchReset();
// done last because inside pars is same depth
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
} else if (this.atRightDepth(this.handleEndAssertCond)) {
String mappingName = this.handleEndAssertCond.peek().mappingName;
if (mappingName.equals(this.endAssertCondMappingName())) {
// the end of an assert condition
this.replaceWith("))");
// adjust parenthesis depth
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
// remove the state
this.handleEndAssertCond.pop();
} else {
this.totalGroups++;
this.replaceWith(
")())?+" + startNonCaptureGroup() + RefactorUtility.acceptTestingGroup(this.totalGroups));
// adjust parenthesis depth
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
this.increaseParenthesisDepth(!DURING_PREREFACTOR);
// add state to handle else branch
this.handleElseBranch
.add(new MatchState(this.endAssertCondMappingName(), this.parenthesisDepth, this.totalGroups));
// remove the state
this.handleEndAssertCond.pop();
}
} else {
// done last because inside pars is same depth
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
}
}
/**
* Refactors a close parenthesis during the after-refactoring step
*/
private void afterRefactorCloseParenthesis() {
this.decreaseParenthesisDepth(!DURING_PREREFACTOR);
}
/**
* Steps to perform at the end (closing parenthesis) of a "branch reset"
* subpattern.
*/
private void endBranchReset() {
int endGroup = this.branchReset.peek().endGroup;
int endUnnamedGroup = this.branchReset.peek().endUnnamedGroup;
if (endGroup > this.currentGroup) {
this.currentGroup = endGroup;
}
if (endUnnamedGroup > this.unnamedGroup) {
this.unnamedGroup = endUnnamedGroup;
}
this.branchReset.pop();
}
/**
* Refactors a pike during the pre-refactoring step
*/
private void preRefactorPike() {
if (this.atRightDepth(this.handleElseBranch)) {
if (this.handleElseBranch.peek().testConditionGroupI == this.parenthesisDepth) {
throw this.error(CONDITIONAL_BRANCHES, this.matcher.start(this.group));
}
this.handleElseBranch.peek().testConditionGroupI = this.parenthesisDepth;
} else if (this.atRightDepth(this.branchReset)) {
this.branchReset();
}
}
/**
* Refactors a pike during the factoring step
*/
private void refactorPike() {
if (this.atRightDepth(this.handleElseBranch)) {
Integer testConditionGroupI = this.handleElseBranch.peek().testConditionGroupI;
String mappingName = this.handleElseBranch.peek().mappingName;
if (testConditionGroupI != null) {
int testConditionGroup = testConditionGroupI;
if (testConditionGroup == TARGET_UNKNOWN) {
// the specified group exists, but occurs later
this.replaceWith("|\\g{" + this.addErrorTrace(this.matcher.start(this.group)) + "testF-"
+ mappingName + "}");
// } else if (testConditionGroup == BRANCH_RESET) {
// // all groups have already occurred
// replaceWith("|" + failTestingGroup(mappingName));
} else if (testConditionGroup != 0) {
// specific group
this.replaceWith("|" + RefactorUtility.failTestingGroup(testConditionGroup));
} else {
// any group
this.replaceWith("|" + this.failTestingGroup(mappingName));
}
}
// else, the specified group doesn't exist
// (i.e. always use else branch) - dealt with elsewhere
this.handleElseBranch.pop();
if (mappingName.equals(this.endAssertCondMappingName())) {
this.handleEndAssertCond.add(new MatchState(mappingName, this.parenthesisDepth, testConditionGroupI));
}
} else if (this.atRightDepth(this.branchReset)) {
this.branchReset();
}
}
/**
* Actions to take when matching the start of a character class
*/
private void increaseCharClassDepth() {
this.charClassDepth++;
int end = this.matcher.end();
int length = this.text.length();
boolean squareBracket = end < length && this.text.charAt(end) == ']';
boolean negSequareBracket = end < length - 2 && this.text.substring(end, end + 2).equals("^]");
if (squareBracket || negSequareBracket) {
// a "]" follows the "["
// don't count the "]" as the end of the character class
// increase the char class depth,
// (it will be decreased upon hitting the "]")
this.charClassDepth++;
}
}
/**
* Actions to take when matching the end of a character class
*/
private void decreaseCharClassDepth() {
// only decrease depth if actually in a character class
// otherwise, treat the "]" as literal
if (this.inCharClass()) {
this.charClassDepth--;
}
}
/**
* Indicates whether currently in a character class.
*
* @return true
, if in a character class
*/
private boolean inCharClass() {
return this.charClassDepth != 0;
}
// /**
// * Pattern to match a unix line separator (or the end of the pattern)
// */
// private static final java.util.regex.Pattern unixLineSeparatorPattern = java.util.regex.Pattern.compile("\\n++|$");
// /**
// * Pattern to match a line separator (based on line separators specified at
// * http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#lt); also matches the end of the pattern
// */
// private static final java.util.regex.Pattern lineSeparatorPattern = java.util.regex.Pattern
// .compile("[\\n\\r\u0085\u2028\u2029]++|$");
/**
* Sets the parser "cursor" at the end of the current line.
*/
// private void parsePastLine()
// {
// // Save the current pattern (restored at end)
// java.util.regex.Pattern currentPattern = matcher.pattern();
//
// // Causes issues in Android (for some unknown reason)
// // (causes garbage collection to run wild if using comments
// // if comments contain named capture group, incorrectly thinks there's two)
// // (XXX: something is really up)
// if (has(UNIX_LINES))
// {
// matcher.usePattern(unixLineSeparatorPattern);
// }
// else
// {
// matcher.usePattern(lineSeparatorPattern);
// }
//
// // Skip past the current line
// matcher.find();
//
// // Restore the previous pattern
// matcher.usePattern(currentPattern);
// }
/**
* Handle the start comments "#" when {@link Pattern#COMMENTS} is enabled
*/
private void handleStartComments() {
this.isInComments = true;
}
/**
* Check for a line terminator and if one is found, end the comment block
*/
private void checkForLineTerminator() {
if (this.has(UNIX_LINES)) {
if (this.match.contains("\n")) {
this.isInComments = false;
}
} else if (this.match.contains("\n") || this.match.contains("\r") || this.match.contains("\u0085")
|| this.match.contains("\u2028") || this.match.contains("\u2029")) {
// Line terminator [\n\r\u0085\u2028\u2029]
this.isInComments = false;
}
}
/**
* Steps taken when entering a new branch in a "branch reset"
* subpattern.
*/
private void branchReset() {
this.branchReset.peek().updateEndGroup(this.currentGroup, this.unnamedGroup);
this.currentGroup = this.branchReset.peek().startGroup;
this.unnamedGroup = this.branchReset.peek().unnamedGroup;
}
/**
* Indicates whether currently in a "branch reset" pattern
*
* @return true
if, and only if, currently in a
* "branch reset" pattern
*/
private boolean inBranchReset() {
return !this.branchReset.isEmpty();
}
/**
* Replace the matched string with the specified (literal)
* replacement, and adds a new state to {@link #differences}.
*
*
* @param replacement
* the replacement
*/
private void replaceWith(final String replacement) {
String quoteReplacement = Matcher.quoteReplacement(replacement);
this.matcher.appendReplacement(this.result, quoteReplacement);
// int length = matcher.end() - matcher.start();
int start = this.result.length() - quoteReplacement.length();
// int end = start + length;
this.differences.replace0(start, this.match, replacement);
// java.util.regex.Matcher
// return matcher;
}
/**
* add javadoc comments.
*/
private static class MatchState implements State {
/** The mapping name. */
String mappingName;
/** The parenthesis depth. */
int parenthesisDepth;
/** The test condition group i. */
Integer testConditionGroupI;
/**
* @param mappingName
* the mapping name for the group
* @param parenthesisDepth
* the parenthesis depth for the group
* @param testConditionGroup
* the test condition group
*/
MatchState(final String mappingName, final int parenthesisDepth, final Integer testConditionGroup) {
this.mappingName = mappingName;
this.parenthesisDepth = parenthesisDepth;
this.testConditionGroupI = testConditionGroup;
}
/**
* {@inheritDoc}
*/
@Override
public int getParenthesisDepth() {
return this.parenthesisDepth;
}
/**
* Returns a string useful for debugging
*
* @return a string representation of this state
*/
@Override
public String toString() {
return this.mappingName + " -> (" + this.testConditionGroupI + "): " + this.parenthesisDepth;
}
}
/**
* The Class BranchResetState.
*/
private static class BranchResetState implements State {
/** The start group. */
int startGroup;
/** Number of unnamed groups */
int unnamedGroup;
/** The parenthesis depth. */
int parenthesisDepth;
/** The end group. */
int endGroup = -1;
/** The end unnamed groups */
int endUnnamedGroup = -1;
/**
* @param startGroup
* the start group for the "branch reset" subpattern
* @param unnamedGroups
* the current number of unnamed groups
* @param parenthesisDepth
* the parenthesis depth for the subpattern
*/
BranchResetState(final int startGroup, final int unnamedGroups, final int parenthesisDepth) {
this.startGroup = startGroup;
this.unnamedGroup = unnamedGroups;
this.parenthesisDepth = parenthesisDepth;
}
/**
* {@inheritDoc}
*/
@Override
public int getParenthesisDepth() {
return this.parenthesisDepth;
}
/**
* Updates the end group of a "branch reset" subpattern
*
* The end group is only updated if the specified group is greater
* than the current end group.
*
* @param group
* the new end group
* @param unnamed
* the new unnamed end group
*/
void updateEndGroup(final int group, final int unnamed) {
if (group > this.endGroup) {
this.endGroup = group;
}
if (unnamed > this.endUnnamedGroup) {
this.endUnnamedGroup = unnamed;
}
}
/**
* Returns a string useful for debugging
*
* @return a string representation of this state
*/
@Override
public String toString() {
return super.toString();
}
}
private static class AddTestGroupState implements State {
/** The mapping name. */
String mappingName;
/** The named mapping name. */
String namedMappingName;
/** The parenthesis depth. */
int parenthesisDepth;
/**
* Instantiates a new AddTestGroupState.
*
* @param mappingName
* the mapping name
* @param parenthesisDepth
* the parenthesis depth
* @param namedMappingName
* the named mapping name
*/
AddTestGroupState(final String mappingName, final int parenthesisDepth, final String namedMappingName) {
this.mappingName = mappingName;
this.namedMappingName = namedMappingName;
this.parenthesisDepth = parenthesisDepth;
}
/**
* {@inheritDoc}
*/
@Override
public int getParenthesisDepth() {
return this.parenthesisDepth;
}
}
/**
* The Interface State.
*/
private static interface State {
/**
* Gets the parenthesis depth.
*
* @return the parenthesis depth
*/
public int getParenthesisDepth();
}
}