com.tidb.jdbc.conf.StringInspector Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2021, 2022, Oracle and/or its affiliates.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, version 2.0, as published by the
* Free Software Foundation.
*
* This program is also distributed with certain software (including but not
* limited to OpenSSL) that is licensed under separate terms, as designated in a
* particular file or component or in included license documentation. The
* authors of MySQL hereby grant you an additional permission to link the
* program and your derivative works with the separately licensed software that
* they have included with MySQL.
*
* Without limiting anything contained in the foregoing, this file, which is
* part of MySQL Connector/J, is also subject to the Universal FOSS Exception,
* version 1.0, a copy of which can be found at
* http://oss.oracle.com/licenses/universal-foss-exception.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
* for more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.tidb.jdbc.conf;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
/**
* Utility class to inspect a MySQL string, typically a query string.
*
* Provides string searching and manipulation operations such as finding sub-strings, matching sub-strings or building a comments-free version of a string.
*
* This object keeps internal state that affect the operations, e.g., executing an indexOf operation after another causes the second to start the search form
* where the previous one stopped.
*/
public class StringInspector {
// Length of MySQL version reference in comments of type '/*![00000] */'.
private static final int NON_COMMENTS_MYSQL_VERSION_REF_LENGTH = 5;
private String source = null;
private String openingMarkers = null;
private String closingMarkers = null;
private String overridingMarkers = null;
private Set defaultSearchMode = null;
private int srcLen = 0;
private int pos = 0;
private int stopAt = 0;
private boolean escaped = false;
private boolean inMysqlBlock = false;
private int markedPos = this.pos;
private int markedStopAt = this.stopAt;
private boolean markedEscape = this.escaped;
private boolean markedInMysqlBlock = this.inMysqlBlock;
/**
* This object provides string searching and manipulation operations such as finding sub-strings, matching sub-strings or building a comments-free version
* of a string.
*
* @param source
* the source string to process
* @param openingMarkers
* the characters that can delimit the beginning of a quoted text block
* @param closingMarkers
* the characters that can delimit the end of a of a quoted text block. The length of this parameter must match the length of th previous one
* @param overridingMarkers
* the subset of openingMarkers
that override the remaining markers, e.g., if openingMarkers = "'("
,
* closingMarkers = "')"
and overridingMarkers = "'"
then the block between the outer parenthesis in "start
* ('max('); end"
is strictly consumed, otherwise the suffix "'); end"
would end up being consumed too in the process of
* handling the nested parenthesis.
* @param searchMode
* a Set
, ideally an EnumSet
, containing the default flags from the enum {@link SearchMode} that determine the behavior
* of the search and manipulation operations. Note that some operation may override the search mode according to their needs, but the default one
* is not touched.
*/
public StringInspector(String source, String openingMarkers, String closingMarkers, String overridingMarkers, Set searchMode) {
this(source, 0, openingMarkers, closingMarkers, overridingMarkers, searchMode);
}
/**
* This object provides string searching and manipulation operations such as finding sub-strings, matching sub-strings or building a comments-free version
* of a string.
*
* @param source
* the source string to process
* @param startingPosition
* the position from where the source string will be processed
* @param openingMarkers
* the characters that can delimit the beginning of a quoted text block
* @param closingMarkers
* the characters that can delimit the end of a of a quoted text block. The length of this parameter must match the length of th previous one
* @param overridingMarkers
* the subset of openingMarkers
that override the remaining markers, e.g., if openingMarkers = "'("
,
* closingMarkers = "')"
and overridingMarkers = "'"
then the block between the outer parenthesis in "start
* ('max('); end"
is strictly consumed, otherwise the suffix "'); end"
would end up being consumed too in the process of
* handling the nested parenthesis.
* @param searchMode
* a Set
, ideally an EnumSet
, containing the default flags from the enum {@link SearchMode} that determine the behavior
* of the search and manipulation operations. Note that some operation may override the search mode according to their needs, but the default one
* is not touched.
*/
public StringInspector(String source, int startingPosition, String openingMarkers, String closingMarkers, String overridingMarkers,
Set searchMode) {
if (source == null) {
throw new IllegalArgumentException("The source string must not be null.");
}
this.source = source;
this.openingMarkers = openingMarkers;
this.closingMarkers = closingMarkers;
this.overridingMarkers = overridingMarkers;
this.defaultSearchMode = searchMode;
String StringInspector3 = "Illegal argument value {0} for overridingMarkers. These cannot be null and must be a sub-set of openingMarkers {1}.";
if (this.defaultSearchMode.contains(SearchMode.SKIP_BETWEEN_MARKERS)) {
if (this.openingMarkers == null || this.closingMarkers == null || this.openingMarkers.length() != this.closingMarkers.length()) {
String messages = "Illegal argument value {0} for openingMarkers and/or {1} for closingMarkers. These cannot be null and must have the same length.";
throw new IllegalArgumentException(MessageFormat.format(messages, new String[] { this.openingMarkers, this.closingMarkers }));
}
if (this.overridingMarkers == null) {
throw new IllegalArgumentException(MessageFormat.format(StringInspector3, new String[] { this.overridingMarkers, this.openingMarkers }));
}
for (char c : this.overridingMarkers.toCharArray()) {
if (this.openingMarkers.indexOf(c) == -1) {
throw new IllegalArgumentException(MessageFormat.format(StringInspector3, new String[] { this.overridingMarkers, this.openingMarkers }));
}
}
}
this.srcLen = source.length();
this.pos = 0;
this.stopAt = this.srcLen;
setStartPosition(startingPosition);
}
/**
* Sets the position from where the source string will be processed from now on, taking into consideration the "escaped" status of current character, if
* the mode {@link SearchMode#ALLOW_BACKSLASH_ESCAPE} is present in the default search mode.
*
* @param pos
* the position from where the source string will be processed
* @return
* the previous current position value
*/
public int setStartPosition(int pos) {
if (pos < 0) {
throw new IllegalArgumentException("The start position must be zero or a positive number.");
}
if (pos > this.stopAt) {
throw new IllegalArgumentException("The start position must must not be higher than the stop position.");
}
int prevPos = this.pos;
this.pos = pos;
resetEscaped();
this.inMysqlBlock = false;
return prevPos;
}
/**
* Sets the position where the source string processing will stop.
*
* @param pos
* the position where the source string will stop being processed
* @return
* the previous stop position value
*/
public int setStopPosition(int pos) {
if (pos < 0) {
throw new IllegalArgumentException("The stop position must be zero or a positive number.");
}
if (pos > this.srcLen) {
throw new IllegalArgumentException("The stop position must must not be higher than the length of the source string.");
}
int prevPos = this.stopAt;
this.stopAt = pos;
return prevPos;
}
/**
* Marks the current object's state. A subsequent call to the {@link #reset()} method restores the marked state.
*/
public void mark() {
this.markedPos = this.pos;
this.markedStopAt = this.stopAt;
this.markedEscape = this.escaped;
this.markedInMysqlBlock = this.inMysqlBlock;
}
/**
* Resets this object's state to previously mark state.
*/
public void reset() {
this.pos = this.markedPos;
this.stopAt = this.markedStopAt;
this.escaped = this.markedEscape;
this.inMysqlBlock = this.markedInMysqlBlock;
}
/**
* Resets this object's state to original values. Allows to reuse the object from a fresh start.
*/
public void restart() {
this.pos = 0;
this.stopAt = this.srcLen;
this.escaped = false;
this.inMysqlBlock = false;
}
/**
* Returns the character in the current position.
*
* @return
* the character in the current position
*/
public char getChar() {
if (this.pos >= this.stopAt) {
return Character.MIN_VALUE;
}
return this.source.charAt(this.pos);
}
/**
* Returns the current position.
*
* @return
* the current position
*/
public int getPosition() {
return this.pos;
}
/**
* Increments the current position index, by one, taking into consideration the "escaped" status of current character, if the mode
* {@link SearchMode#ALLOW_BACKSLASH_ESCAPE} is present in the default search mode.
*
* @return
* the new current position
*/
public int incrementPosition() {
return incrementPosition(this.defaultSearchMode);
}
/**
* Increments the current position index, by one, taking into consideration the "escaped" status of current character, if the mode
* {@link SearchMode#ALLOW_BACKSLASH_ESCAPE} is present in the search mode specified.
*
* @param searchMode
* the search mode to use in this operation
* @return
* the new current position
*/
public int incrementPosition(Set searchMode) {
if (this.pos >= this.stopAt) {
return -1;
}
if (searchMode.contains(SearchMode.ALLOW_BACKSLASH_ESCAPE) && getChar() == '\\') {
this.escaped = !this.escaped;
} else if (this.escaped) {
this.escaped = false;
}
return ++this.pos;
}
/**
* Increments the current position index, by be given number, taking into consideration the "escaped" status of current character, if the mode
* {@link SearchMode#ALLOW_BACKSLASH_ESCAPE} is present in the default search mode.
*
* @param by
* the number of positions to increment
* @return
* the new current position
*/
public int incrementPosition(int by) {
return incrementPosition(by, this.defaultSearchMode);
}
/**
* Increments the current position index, by be given number, taking into consideration the "escaped" status of current character, if the mode
* {@link SearchMode#ALLOW_BACKSLASH_ESCAPE} is present in the specified search mode.
*
* @param by
* the number of positions to increment
* @param searchMode
* the search mode to use in this operation
* @return
* the new current position
*/
public int incrementPosition(int by, Set searchMode) {
for (int i = 0; i < by; i++) {
if (incrementPosition(searchMode) == -1) {
return -1;
}
}
return this.pos;
}
/**
* Checks if the current character is escaped by analyzing previous characters, as long as the search mode {@link SearchMode#ALLOW_BACKSLASH_ESCAPE} is
* present in the default search mode.
*/
private void resetEscaped() {
this.escaped = false;
if (this.defaultSearchMode.contains(SearchMode.ALLOW_BACKSLASH_ESCAPE)) {
for (int i = this.pos - 1; i >= 0; i--) {
if (this.source.charAt(i) != '\\') {
break;
}
this.escaped = !this.escaped;
}
}
}
/**
* Returns the position of the next valid character. This method does not increment the current position automatically, i.e., if already positioned in a
* valid character then repeated calls return always the same index.
* If the character in the current position matches one of the prefixes that determine a skipping block, then the position marker advances to the first
* character after the block to skip.
*
* @return
* the position of the next valid character, or the current position if already on a valid character
*/
public int indexOfNextChar() {
return indexOfNextChar(this.defaultSearchMode);
}
/**
* Returns the position of the next valid character using the given search mode instead of the default one. This method does not increment the current
* position automatically, i.e., if already positioned in a valid character then repeated calls return always the same index.
* If the character in the current position matches one of the prefixes that determine a skipping block, then the position marker advances to the first
* character after the block to skip.
*
* @param searchMode
* the search mode to use in this operation
* @return
* the position of the next valid character, or the current position if already on a valid character
*/
private int indexOfNextChar(Set searchMode) {
if (this.source == null) {
return -1;
}
if (this.pos >= this.stopAt) {
return -1;
}
char c0 = Character.MIN_VALUE; // Current char.
char c1 = this.source.charAt(this.pos); // Lookahead(1).
char c2 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE; // Lookahead(2).
for (; this.pos < this.stopAt; this.pos++) {
c0 = c1;
c1 = c2;
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
boolean dashDashCommentImmediateEnd = false;
boolean checkSkipConditions = !searchMode.contains(SearchMode.ALLOW_BACKSLASH_ESCAPE) || !this.escaped;
if (checkSkipConditions && searchMode.contains(SearchMode.SKIP_BETWEEN_MARKERS) && this.openingMarkers.indexOf(c0) != -1) {
// Opening marker found, skip until closing.
indexOfClosingMarker(searchMode);
if (this.pos >= this.stopAt) {
this.pos--; // Reached stop position. Correct position will be set by outer loop.
} else {
// Reset lookahead.
c1 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE;
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
}
} else if (checkSkipConditions && searchMode.contains(SearchMode.SKIP_BLOCK_COMMENTS) && c0 == '/' && c1 == '*' && c2 != '!' && c2 != '+') {
// Comments block found, skip until end of block ("*/") (backslash escape doesn't work in comments).
// Does not include hint blocks ("/*!" or "/*+").
this.pos++; // Move to next char ('*').
while (++this.pos < this.stopAt && (this.source.charAt(this.pos) != '*'
|| (this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE) != '/')) {
// Continue
}
if (this.pos >= this.stopAt) {
this.pos--; // Reached stop position. Correct position will be set by outer loop.
} else {
this.pos++; // Move to next char ('/').
}
// Reset lookahead.
c1 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE;
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
} else if (checkSkipConditions && searchMode.contains(SearchMode.SKIP_LINE_COMMENTS)
&& ((c0 == '-' && c1 == '-' && (Character.isWhitespace(c2) || (dashDashCommentImmediateEnd = c2 == ';') || c2 == Character.MIN_VALUE))
|| c0 == '#')) {
if (dashDashCommentImmediateEnd) {
// Comments line found but closed immediately by query delimiter marker.
this.pos++; // Move to next char ('-').
this.pos++; // Move to next char (';').
// Reset lookahead.
c1 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE;
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
} else {
// Comments line found, skip until EOL (backslash escape doesn't work on comments).
while (++this.pos < this.stopAt && (c0 = this.source.charAt(this.pos)) != '\n' && c0 != '\r') {
// Continue.
}
if (this.pos >= this.stopAt) {
this.pos--; // Reached stop position. Correct position will be set by outer loop.
} else {
// Reset lookahead.
c1 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE;
if (c0 == '\r' && c1 == '\n') {
// \r\n sequence found.
this.pos++; // Skip next char ('\n').
c1 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE;
}
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
}
}
} else if (checkSkipConditions && searchMode.contains(SearchMode.SKIP_HINT_BLOCKS) && c0 == '/' && c1 == '*' && c2 == '+') {
// Hints block found, skip until end of block ("*/") (backslash escape doesn't work in hints).
this.pos++; // Move to next char ('*').
this.pos++; // Move to next char ('+').
while (++this.pos < this.stopAt && (this.source.charAt(this.pos) != '*'
|| (this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE) != '/')) {
// Continue
}
if (this.pos >= this.stopAt) {
this.pos--; // Reached stop position. Correct position will be set by outer loop.
} else {
this.pos++; // Move to next char ('/').
}
// Reset lookahead.
c1 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE;
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
} else if (checkSkipConditions && searchMode.contains(SearchMode.SKIP_MYSQL_MARKERS) && c0 == '/' && c1 == '*' && c2 == '!') {
// MySQL specific block found, move to end of opening marker ("/*![12345]").
this.pos++; // Move to next char ('*').
this.pos++; // Move to next char ('!').
if (c2 == '!') {
// Check if 5 digits MySQL version reference comes next, if so skip them.
int i = 0;
for (; i < NON_COMMENTS_MYSQL_VERSION_REF_LENGTH; i++) {
if (this.pos + 1 + i >= this.srcLen || !Character.isDigit(this.source.charAt(this.pos + 1 + i))) {
break;
}
}
if (i == NON_COMMENTS_MYSQL_VERSION_REF_LENGTH) {
this.pos += NON_COMMENTS_MYSQL_VERSION_REF_LENGTH;
if (this.pos >= this.stopAt) {
this.pos = this.stopAt - 1; // Reached stop position. Correct position will be set by outer loop.
}
}
}
// Reset lookahead.
c1 = this.pos + 1 < this.srcLen ? this.source.charAt(this.pos + 1) : Character.MIN_VALUE;
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
this.inMysqlBlock = true;
} else if (this.inMysqlBlock && checkSkipConditions && searchMode.contains(SearchMode.SKIP_MYSQL_MARKERS) && c0 == '*' && c1 == '/') {
// MySQL block closing marker ("*/") found.
this.pos++; // move to next char ('/')
// Reset lookahead.
c1 = c2;
c2 = this.pos + 2 < this.srcLen ? this.source.charAt(this.pos + 2) : Character.MIN_VALUE;
this.inMysqlBlock = false;
} else if (!searchMode.contains(SearchMode.SKIP_WHITE_SPACE) || !Character.isWhitespace(c0)) {
// Whitespace is not affected by backslash escapes.
return this.pos;
}
// Reaching here means that the position has incremented thus an 'escaped' status no longer holds.
this.escaped = false;
}
return -1;
}
/**
* Returns the position of the next closing marker corresponding to the opening marker in the current position.
* If the current position is not an opening marker, then -1 is returned instead.
*
* @param searchMode
* the search mode to use in this operation
* @return
* the position of the next closing marker corresponding to the opening marker in the current position
*/
private int indexOfClosingMarker(Set searchMode) {
if (this.source == null) {
return -1;
}
if (this.pos >= this.stopAt) {
return -1;
}
char c0 = this.source.charAt(this.pos); // Current char, also the opening marker.
int markerIndex = this.openingMarkers.indexOf(c0);
if (markerIndex == -1) {
// Not at an opening marker.
return this.pos;
}
int nestedMarkersCount = 0;
char openingMarker = c0;
char closingMarker = this.closingMarkers.charAt(markerIndex);
boolean outerIsAnOverridingMarker = this.overridingMarkers.indexOf(openingMarker) != -1;
while (++this.pos < this.stopAt && ((c0 = this.source.charAt(this.pos)) != closingMarker || nestedMarkersCount != 0)) {
if (!outerIsAnOverridingMarker && this.overridingMarkers.indexOf(c0) != -1) {
// There is an overriding marker that needs to be consumed before returning to the previous marker.
int overridingMarkerIndex = this.openingMarkers.indexOf(c0); // OverridingMarkers must be a sub-list of openingMarkers.
int overridingNestedMarkersCount = 0;
char overridingOpeningMarker = c0;
char overridingClosingMarker = this.closingMarkers.charAt(overridingMarkerIndex);
while (++this.pos < this.stopAt && ((c0 = this.source.charAt(this.pos)) != overridingClosingMarker || overridingNestedMarkersCount != 0)) {
// Do as in the outer loop, except that this marker cannot be overridden.
if (c0 == overridingOpeningMarker) {
overridingNestedMarkersCount++;
} else if (c0 == overridingClosingMarker) {
overridingNestedMarkersCount--;
} else if (searchMode.contains(SearchMode.ALLOW_BACKSLASH_ESCAPE) && c0 == '\\') {
this.pos++; // Next char is escaped, skip it.
}
}
if (this.pos >= this.stopAt) {
this.pos--; // Reached stop position. Correct position will be set by outer loop.
}
} else if (c0 == openingMarker) {
nestedMarkersCount++;
} else if (c0 == closingMarker) {
nestedMarkersCount--;
} else if (searchMode.contains(SearchMode.ALLOW_BACKSLASH_ESCAPE) && c0 == '\\') {
this.pos++; // Next char is escaped, skip it.
}
}
return this.pos;
}
/**
* Returns the position of the next alphanumeric character regardless the default search mode originally specified. This method does not increment the
* current position automatically, i.e., if already positioned in a valid character then repeated calls return always the same index.
* If the character in the current position matches one of the prefixes that determine a skipping block, then the position marker advances to the first
* alphanumeric character after the block to skip.
*
* @return
* the position of the next valid character, or the current position if already on a valid character
*/
public int indexOfNextAlphanumericChar() {
if (this.source == null) {
return -1;
}
if (this.pos >= this.stopAt) {
return -1;
}
Set searchMode = this.defaultSearchMode;
if (!this.defaultSearchMode.contains(SearchMode.SKIP_WHITE_SPACE)) {
searchMode = EnumSet.copyOf(this.defaultSearchMode);
searchMode.add(SearchMode.SKIP_WHITE_SPACE);
}
while (this.pos < this.stopAt) {
int prevPos = this.pos;
if (indexOfNextChar(searchMode) == -1) {
return -1;
}
if (Character.isLetterOrDigit(this.source.charAt(this.pos))) {
return this.pos;
}
if (this.pos == prevPos) {
// Position didn't move but also not yet at an alphanumeric.
incrementPosition(searchMode);
}
}
return -1;
}
/**
* Returns the position of the next non-whitespace character regardless the default search mode originally specified. This method does not increment the
* current position automatically, i.e., if already positioned in a valid character then repeated calls return always the same index.
* If the character in the current position matches one of the prefixes that determine a skipping block, then the position marker advances to the first
* non-whitespace character after the block to skip.
*
* @return
* the position of the next valid character, or the current position if already on a valid character
*/
public int indexOfNextNonWsChar() {
if (this.source == null) {
return -1;
}
if (this.pos >= this.stopAt) {
return -1;
}
Set searchMode = this.defaultSearchMode;
if (!this.defaultSearchMode.contains(SearchMode.SKIP_WHITE_SPACE)) {
searchMode = EnumSet.copyOf(this.defaultSearchMode);
searchMode.add(SearchMode.SKIP_WHITE_SPACE);
}
return indexOfNextChar(searchMode);
}
/**
* Returns the position of the next whitespace character regardless the default search mode originally specified. This method does not increment the
* current position automatically, i.e., if already positioned in a valid character then repeated calls return always the same index.
* If the character in the current position matches one of the prefixes that determine a skipping block, then the position marker advances to the first
* whitespace character after the block to skip.
*
* @return
* the position of the next valid character, or the current position if already on a valid character
*/
public int indexOfNextWsChar() {
if (this.source == null) {
return -1;
}
if (this.pos >= this.stopAt) {
return -1;
}
Set searchMode = this.defaultSearchMode;
if (this.defaultSearchMode.contains(SearchMode.SKIP_WHITE_SPACE)) {
searchMode = EnumSet.copyOf(this.defaultSearchMode);
searchMode.remove(SearchMode.SKIP_WHITE_SPACE);
}
while (this.pos < this.stopAt) {
int prevPos = this.pos;
if (indexOfNextChar(searchMode) == -1) {
return -1;
}
if (Character.isWhitespace(this.source.charAt(this.pos))) {
return this.pos;
}
if (this.pos == prevPos) {
// Position didn't move but also not yet at a white space.
incrementPosition(searchMode);
}
}
return -1;
}
/**
* Finds the position of the given string within the source string, ignoring case, with the option to skip text delimited by the specified markers or inside
* comment blocks.
*
* @param searchFor
* the sub-string to search for
* @return
* the position where the sub-string is found, starting from the current position, or -1 if not found
*/
public int indexOfIgnoreCase(String searchFor) {
return indexOfIgnoreCase(searchFor, this.defaultSearchMode);
}
/**
* Finds the position of the given string within the source string, ignoring case, with the option to skip text delimited by the specified markers or inside
* comment blocks.
*
* @param searchFor
* the sub-string to search for
* @param searchMode
* the search mode to use in this operation
* @return
* the position where the sub-string is found, starting from the current position, or -1 if not found
*/
public int indexOfIgnoreCase(String searchFor, Set searchMode) {
if (searchFor == null) {
return -1;
}
int searchForLength = searchFor.length();
int localStopAt = this.srcLen - searchForLength + 1;
if (localStopAt > this.stopAt) {
localStopAt = this.stopAt;
}
if (this.pos >= localStopAt || searchForLength == 0) {
return -1;
}
// Some locales don't follow upper-case rule, so need to check both.
char firstCharOfSearchForUc = Character.toUpperCase(searchFor.charAt(0));
char firstCharOfSearchForLc = Character.toLowerCase(searchFor.charAt(0));
Set localSearchMode = searchMode;
if (Character.isWhitespace(firstCharOfSearchForLc) && this.defaultSearchMode.contains(SearchMode.SKIP_WHITE_SPACE)) {
// Can't skip white spaces if first searchFor char is one.
localSearchMode = EnumSet.copyOf(this.defaultSearchMode);
localSearchMode.remove(SearchMode.SKIP_WHITE_SPACE);
}
while (this.pos < localStopAt) {
if (indexOfNextChar(localSearchMode) == -1) {
return -1;
}
if (StringUtils.isCharEqualIgnoreCase(getChar(), firstCharOfSearchForUc, firstCharOfSearchForLc)
&& StringUtils.regionMatchesIgnoreCase(this.source, this.pos, searchFor)) {
return this.pos;
}
incrementPosition(localSearchMode);
}
return -1;
}
/**
* Finds the position of the of the first of a consecutive sequence of strings within the source string, ignoring case, with the option to skip text
* delimited by the specified markers or inside comment blocks.
*
*
* Independently of the searchMode
specified, when searching for the second and following sub-strings {@link SearchMode#SKIP_WHITE_SPACE} will
* be added and {@link SearchMode#SKIP_BETWEEN_MARKERS} removed.
*
*
* @param searchFor
* the sequence of sub-strings to search
* @return
* the position where the first sub-string is found, starting from the current position, or -1 if not found
*/
public int indexOfIgnoreCase(String... searchFor) {
if (searchFor == null) {
return -1;
}
int searchForLength = 0;
for (String searchForPart : searchFor) {
searchForLength += searchForPart.length();
} // Minimum length for searchFor (without gaps between words).
if (searchForLength == 0) {
return -1;
}
int searchForWordsCount = searchFor.length;
searchForLength += searchForWordsCount > 0 ? searchForWordsCount - 1 : 0; // Count gaps between words.
int localStopAt = this.srcLen - searchForLength + 1;
if (localStopAt > this.stopAt) {
localStopAt = this.stopAt;
}
if (this.pos >= localStopAt) {
return -1;
}
Set searchMode1 = this.defaultSearchMode;
if (Character.isWhitespace(searchFor[0].charAt(0)) && this.defaultSearchMode.contains(SearchMode.SKIP_WHITE_SPACE)) {
// Cannot skip white spaces if first searchFor char is one.
searchMode1 = EnumSet.copyOf(this.defaultSearchMode);
searchMode1.remove(SearchMode.SKIP_WHITE_SPACE);
}
// searchMode used to search 2nd and following words cannot contain SearchMode.SKIP_BETWEEN_MARKERS and must contain SearchMode.SKIP_WHITE_SPACE.
Set searchMode2 = EnumSet.copyOf(this.defaultSearchMode);
searchMode2.add(SearchMode.SKIP_WHITE_SPACE);
searchMode2.remove(SearchMode.SKIP_BETWEEN_MARKERS);
while (this.pos < localStopAt) {
int positionOfFirstWord = indexOfIgnoreCase(searchFor[0], searchMode1);
if (positionOfFirstWord == -1 || positionOfFirstWord >= localStopAt) {
return -1;
}
mark();
int startingPositionForNextWord = incrementPosition(searchFor[0].length(), searchMode2);
int wc = 0;
boolean match = true;
while (++wc < searchForWordsCount && match) {
if (indexOfNextChar(searchMode2) == -1 || startingPositionForNextWord == this.pos
|| !StringUtils.regionMatchesIgnoreCase(this.source, this.pos, searchFor[wc])) {
// Either there are no more chars to search, there is no gap between words or match failed.
match = false;
} else {
startingPositionForNextWord = incrementPosition(searchFor[wc].length(), searchMode2);
}
}
if (match) {
reset();
return positionOfFirstWord;
}
}
return -1;
}
/**
* Checks if the given string matches the source string counting from the current position, ignoring case, with the option to skip text delimited by the
* specified markers or inside comment blocks.
*
* @param toMatch
* the sub-string to match
* @return
* the position where the sub-string match ended, or -1 if not matched
*/
public int matchesIgnoreCase(String toMatch) {
if (toMatch == null) {
return -1;
}
int toMatchLength = toMatch.length();
int localStopAt = this.srcLen - toMatchLength + 1;
if (localStopAt > this.stopAt) {
localStopAt = this.stopAt;
}
if (this.pos >= localStopAt || toMatchLength == 0) {
return -1;
}
// Some locales don't follow upper-case rule, so need to check both.
char firstCharOfToMatchUc = Character.toUpperCase(toMatch.charAt(0));
char firstCharOfToMatchLc = Character.toLowerCase(toMatch.charAt(0));
if (StringUtils.isCharEqualIgnoreCase(getChar(), firstCharOfToMatchUc, firstCharOfToMatchLc)
&& StringUtils.regionMatchesIgnoreCase(this.source, this.pos, toMatch)) {
return this.pos + toMatchLength;
}
return -1;
}
/**
* Checks if the given consecutive sequence of strings matches the source string counting from the current position, ignoring case, with the option to skip
* text delimited by the specified markers or inside comment blocks.
*
*
* Independently of the searchMode
specified, when matching the second and following sub-strings {@link SearchMode#SKIP_WHITE_SPACE} will be
* added and {@link SearchMode#SKIP_BETWEEN_MARKERS} removed.
*
*
* @param toMatch
* the sequence of sub-strings to match
* @return
* the position where the sequence of sub-strings match ended, or -1 if not matched
*/
public int matchesIgnoreCase(String... toMatch) {
if (toMatch == null) {
return -1;
}
int toMatchLength = 0;
for (String toMatchPart : toMatch) {
toMatchLength += toMatchPart.length();
} // Minimum length for searchFor (without gaps between words).
if (toMatchLength == 0) {
return -1;
}
int toMatchWordsCount = toMatch.length;
toMatchLength += toMatchWordsCount > 0 ? toMatchWordsCount - 1 : 0; // Count gaps between words.
int localStopAt = this.srcLen - toMatchLength + 1;
if (localStopAt > this.stopAt) {
localStopAt = this.stopAt;
}
if (this.pos >= localStopAt) {
return -1;
}
// searchMode used to match 2nd and following words cannot contain SearchMode.SKIP_BETWEEN_MARKERS and must contain SearchMode.SKIP_WHITE_SPACE.
Set searchMode2 = EnumSet.copyOf(this.defaultSearchMode);
searchMode2.add(SearchMode.SKIP_WHITE_SPACE);
searchMode2.remove(SearchMode.SKIP_BETWEEN_MARKERS);
mark();
int endOfMatch = -1;
int startingPositionForNextWord = -1;
for (String searchForPart : toMatch) {
if (getPosition() == startingPositionForNextWord) {
// There is no gap between words.
reset();
return -1;
}
endOfMatch = matchesIgnoreCase(searchForPart);
if (endOfMatch == -1) {
reset();
return -1;
}
startingPositionForNextWord = incrementPosition(searchForPart.length(), searchMode2);
indexOfNextChar(searchMode2);
}
reset();
return endOfMatch;
}
/**
* Returns a copy of the source string stripped of all comments and hints.
*
* @return
* a comments-free string
*/
public String stripCommentsAndHints() {
restart();
Set searchMode = EnumSet.of(SearchMode.SKIP_BLOCK_COMMENTS, SearchMode.SKIP_LINE_COMMENTS, SearchMode.SKIP_HINT_BLOCKS);
if (this.defaultSearchMode.contains(SearchMode.ALLOW_BACKSLASH_ESCAPE)) {
searchMode.add(SearchMode.ALLOW_BACKSLASH_ESCAPE);
}
StringBuilder noCommsStr = new StringBuilder(this.source.length());
while (this.pos < this.stopAt) {
int prevPos = this.pos;
if (indexOfNextChar(searchMode) == -1) {
return noCommsStr.toString();
}
if (!this.escaped && this.openingMarkers.indexOf(getChar()) != -1) {
// Characters between markers must be taken all at once otherwise indexOfNextChar() would skip them if they contain comments-like text.
int idxOpMrkr = this.pos;
if (indexOfClosingMarker(searchMode) < this.srcLen) {
incrementPosition(searchMode); // Include the closing marker.
}
noCommsStr.append(this.source, idxOpMrkr, this.pos);
} else {
if (this.pos - prevPos > 1) { // Non consecutive characters, there was a comment in between. Add a space if needed.
if (prevPos > 0 && !Character.isWhitespace(this.source.charAt(prevPos - 1)) && !Character.isWhitespace(this.source.charAt(this.pos))) {
noCommsStr.append(" ");
}
}
noCommsStr.append(getChar());
incrementPosition(searchMode);
}
}
return noCommsStr.toString();
}
/**
* Splits the source string by the given delimiter. Consecutive delimiters result in empty string parts.
*
* @param delimiter
* the characters sequence where to split the source string
* @param trim
* whether each one of the parts should be trimmed or not
* @return
* a {@link List} containing all the string parts
*/
public List split(String delimiter, boolean trim) {
if (delimiter == null) {
throw new IllegalArgumentException("The delimiter string must not be null.");
}
restart();
int startPos = 0;
List splitParts = new ArrayList<>();
while (indexOfIgnoreCase(delimiter) != -1) {
indexOfIgnoreCase(delimiter);
String part = this.source.substring(startPos, this.pos);
if (trim) {
part = part.trim();
}
splitParts.add(part);
startPos = incrementPosition(delimiter.length());
}
// Add last part.
String token = this.source.substring(startPos);
if (trim) {
token = token.trim();
}
splitParts.add(token);
return splitParts;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy