org.eclipse.jface.text.FindReplaceDocumentAdapter Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of aspectjtools Show documentation
AspectJ tools most notably contains the AspectJ compiler (AJC). AJC applies aspects to Java classes during compilation, fully replacing Javac for plain Java classes and also compiling native AspectJ or annotation-based @AspectJ syntax. Furthermore, AJC can weave aspects into existing class files in a post-compile binary weaving step. This library is a superset of AspectJ weaver and hence also of AspectJ runtime.
There is a newer version: 1.9.22.1
Show newest version
/*******************************************************************************
 * Copyright (c) 2000, 2012 IBM Corporation and others.
 *
 * This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * which accompanies this distribution, and is available at
 * https://www.eclipse.org/legal/epl-2.0/
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Cagatay Calli  - [find/replace] retain caps when replacing - https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949
 *     Cagatay Calli  - [find/replace] define & fix behavior of retain caps with other escapes and text before \C - https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061
 *******************************************************************************/
package org.eclipse.jface.text;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.eclipse.core.runtime.Assert;


/**
 * Provides search and replace operations on
 * {@link org.eclipse.jface.text.IDocument}.
 * 
 * Replaces
 * {@link org.eclipse.jface.text.IDocument#search(int, String, boolean, boolean, boolean)}.
 *
 * @since 3.0
 */
public class FindReplaceDocumentAdapter implements CharSequence {

	/**
	 * Internal type for operation codes.
	 */
	private static class FindReplaceOperationCode {
	}

	// Find/replace operation codes.
	private static final FindReplaceOperationCode FIND_FIRST= new FindReplaceOperationCode();
	private static final FindReplaceOperationCode FIND_NEXT= new FindReplaceOperationCode();
	private static final FindReplaceOperationCode REPLACE= new FindReplaceOperationCode();
	private static final FindReplaceOperationCode REPLACE_FIND_NEXT= new FindReplaceOperationCode();

	/**
	 * Retain case mode constants.
	 * @since 3.4
	 */
	private static final int RC_MIXED= 0;
	private static final int RC_UPPER= 1;
	private static final int RC_LOWER= 2;
	private static final int RC_FIRSTUPPER= 3;


	/**
	 * The adapted document.
	 */
	private IDocument fDocument;

	/**
	 * State for findReplace.
	 */
	private FindReplaceOperationCode fFindReplaceState= null;

	/**
	 * The matcher used in findReplace.
	 */
	private Matcher fFindReplaceMatcher;

	/**
	 * The match offset from the last findReplace call.
	 */
	private int fFindReplaceMatchOffset;

	/**
	 * Retain case mode
	 */
	private int fRetainCaseMode;

	/**
	 * Constructs a new find replace document adapter.
	 *
	 * @param document the adapted document
	 */
	public FindReplaceDocumentAdapter(IDocument document) {
		Assert.isNotNull(document);
		fDocument= document;
	}

	/**
	 * Returns the location of a given string in this adapter's document based on a set of search criteria.
	 *
	 * @param startOffset document offset at which search starts
	 * @param findString the string to find
	 * @param forwardSearch the search direction
	 * @param caseSensitive indicates whether lower and upper case should be distinguished
	 * @param wholeWord indicates whether the findString should be limited by white spaces as
	 * 			defined by Character.isWhiteSpace. Must not be used in combination with regExSearch.
	 * @param regExSearch if true findString represents a regular expression
	 * 			Must not be used in combination with wholeWord.
	 * @return the find or replace region or null if there was no match
	 * @throws BadLocationException if startOffset is an invalid document offset
	 * @throws PatternSyntaxException if a regular expression has invalid syntax
	 */
	public IRegion find(int startOffset, String findString, boolean forwardSearch, boolean caseSensitive, boolean wholeWord, boolean regExSearch) throws BadLocationException {
		Assert.isTrue(!(regExSearch && wholeWord));

		// Adjust offset to special meaning of -1
		if (startOffset == -1 && forwardSearch)
			startOffset= 0;
		if (startOffset == -1 && !forwardSearch)
			startOffset= length() - 1;

		return findReplace(FIND_FIRST, startOffset, findString, null, forwardSearch, caseSensitive, wholeWord, regExSearch);
	}

	/**
	 * Stateful findReplace executes a FIND, REPLACE, REPLACE_FIND or FIND_FIRST operation.
	 * In case of REPLACE and REPLACE_FIND it sends a DocumentEvent to all
	 * registered IDocumentListener.
	 *
	 * @param startOffset document offset at which search starts
	 * 			this value is only used in the FIND_FIRST operation and otherwise ignored
	 * @param findString the string to find
	 * 			this value is only used in the FIND_FIRST operation and otherwise ignored
	 * @param replaceText the string to replace the current match
	 * 			this value is only used in the REPLACE and REPLACE_FIND operations and otherwise ignored
	 * @param forwardSearch the search direction
	 * @param caseSensitive indicates whether lower and upper case should be distinguished
	 * @param wholeWord indicates whether the findString should be limited by white spaces as
	 * 			defined by Character.isWhiteSpace. Must not be used in combination with regExSearch.
	 * @param regExSearch if true this operation represents a regular expression
	 * 			Must not be used in combination with wholeWord.
	 * @param operationCode specifies what kind of operation is executed
	 * @return the find or replace region or null if there was no match
	 * @throws BadLocationException if startOffset is an invalid document offset
	 * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
	 * @throws PatternSyntaxException if a regular expression has invalid syntax
	 */
	private IRegion findReplace(final FindReplaceOperationCode operationCode, int startOffset, String findString, String replaceText, boolean forwardSearch, boolean caseSensitive, boolean wholeWord, boolean regExSearch) throws BadLocationException {

		// Validate option combinations
		Assert.isTrue(!(regExSearch && wholeWord));

		// Validate state
		if ((operationCode == REPLACE || operationCode == REPLACE_FIND_NEXT) && (fFindReplaceState != FIND_FIRST && fFindReplaceState != FIND_NEXT))
			throw new IllegalStateException("illegal findReplace state: cannot replace without preceding find"); //$NON-NLS-1$

		if (operationCode == FIND_FIRST) {
			// Reset

			if (findString == null || findString.isEmpty())
				return null;

			// Validate start offset
			if (startOffset < 0 || startOffset > length())
				throw new BadLocationException();

			int patternFlags= 0;

			if (regExSearch) {
				patternFlags |= Pattern.MULTILINE;
				findString= substituteLinebreak(findString);
			}

			if (!caseSensitive)
				patternFlags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;

			if (!regExSearch)
				findString= asRegPattern(findString);

			if (wholeWord)
				findString= "\\b" + findString + "\\b"; //$NON-NLS-1$ //$NON-NLS-2$

			fFindReplaceMatchOffset= startOffset;
			if (fFindReplaceMatcher != null && fFindReplaceMatcher.pattern().pattern().equals(findString) && fFindReplaceMatcher.pattern().flags() == patternFlags) {
				/*
				 * Commented out for optimization:
				 * The call is not needed since FIND_FIRST uses find(int) which resets the matcher
				 */
				// fFindReplaceMatcher.reset();
			} else {
				Pattern pattern= Pattern.compile(findString, patternFlags);
				fFindReplaceMatcher= pattern.matcher(this);
			}
		}

		// Set state
		fFindReplaceState= operationCode;

		if (operationCode == REPLACE || operationCode == REPLACE_FIND_NEXT) {
			if (regExSearch) {
				Pattern pattern= fFindReplaceMatcher.pattern();
				String prevMatch= fFindReplaceMatcher.group();
				try {
					replaceText= interpretReplaceEscapes(replaceText, prevMatch);
					Matcher replaceTextMatcher= pattern.matcher(prevMatch);
					replaceText= replaceTextMatcher.replaceFirst(replaceText);
				} catch (IndexOutOfBoundsException ex) {
					throw new PatternSyntaxException(ex.getLocalizedMessage(), replaceText, -1);
				}
			}

			int offset= fFindReplaceMatcher.start();
			int length= fFindReplaceMatcher.group().length();

			if (fDocument instanceof IRepairableDocumentExtension
					&& ((IRepairableDocumentExtension)fDocument).isLineInformationRepairNeeded(offset, length, replaceText)) {
				String message= TextMessages.getString("FindReplaceDocumentAdapter.incompatibleLineDelimiter"); //$NON-NLS-1$
				throw new PatternSyntaxException(message, replaceText, offset);
			}

			fDocument.replace(offset, length, replaceText);

			if (operationCode == REPLACE) {
				return new Region(offset, replaceText.length());
			}
		}

		if (operationCode != REPLACE) {
			try {
				if (forwardSearch) {

					boolean found= false;
					if (operationCode == FIND_FIRST)
						found= fFindReplaceMatcher.find(startOffset);
					else
						found= fFindReplaceMatcher.find();

					if (operationCode == REPLACE_FIND_NEXT)
						fFindReplaceState= FIND_NEXT;

					if (found && !fFindReplaceMatcher.group().isEmpty())
						return new Region(fFindReplaceMatcher.start(), fFindReplaceMatcher.group().length());
					return null;
				}
				// backward search
				boolean found= fFindReplaceMatcher.find(0);
				int index= -1;
				int length= -1;
				while (found && fFindReplaceMatcher.start() + fFindReplaceMatcher.group().length() <= fFindReplaceMatchOffset + 1) {
					index= fFindReplaceMatcher.start();
					length= fFindReplaceMatcher.group().length();
					found= fFindReplaceMatcher.find(index + 1);
				}
				fFindReplaceMatchOffset= index;
				if (index > -1) {
					// must set matcher to correct position
					fFindReplaceMatcher.find(index);
					return new Region(index, length);
				}
				return null;
			} catch (StackOverflowError e) {
				String message= TextMessages.getString("FindReplaceDocumentAdapter.patternTooComplex"); //$NON-NLS-1$
				throw new PatternSyntaxException(message, findString, -1);
			}
		}

		return null;
	}

	/**
	 * Substitutes \R in a regex find pattern with {@code (?>\r\n?|\n)}
	 *
	 * @param findString the original find pattern
	 * @return the transformed find pattern
	 * @throws PatternSyntaxException if \R is added at an illegal position (e.g. in a character set)
	 * @since 3.4
	 */
	private String substituteLinebreak(String findString) throws PatternSyntaxException {
		int length= findString.length();
		StringBuilder buf= new StringBuilder(length);

		int inCharGroup= 0;
		int inBraces= 0;
		boolean inQuote= false;
		for (int i= 0; i < length; i++) {
			char ch= findString.charAt(i);
			switch (ch) {
				case '[':
					buf.append(ch);
					if (! inQuote)
						inCharGroup++;
					break;

				case ']':
					buf.append(ch);
					if (! inQuote)
						inCharGroup--;
					break;

				case '{':
					buf.append(ch);
					if (! inQuote && inCharGroup == 0)
						inBraces++;
					break;

				case '}':
					buf.append(ch);
					if (! inQuote && inCharGroup == 0)
						inBraces--;
					break;

				case '\\':
					if (i + 1 < length) {
						char ch1= findString.charAt(i + 1);
						if (inQuote) {
							if (ch1 == 'E')
								inQuote= false;
							buf.append(ch).append(ch1);
							i++;

						} else if (ch1 == 'R') {
							if (inCharGroup > 0 || inBraces > 0) {
								String msg= TextMessages.getString("FindReplaceDocumentAdapter.illegalLinebreak"); //$NON-NLS-1$
								throw new PatternSyntaxException(msg, findString, i);
							}
							buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$
							i++;

						} else {
							if (ch1 == 'Q') {
								inQuote= true;
							}
							buf.append(ch).append(ch1);
							i++;
						}
					} else {
						buf.append(ch);
					}
					break;

				default:
					buf.append(ch);
					break;
			}

		}
		return buf.toString();
	}

	/**
	 * Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed)
	 * and appends the character ch to buf after processing.
	 *
	 * @param buf the output buffer
	 * @param ch the character to process
	 * @since 3.4
	 */
	private void interpretRetainCase(StringBuilder buf, char ch) {
		if (fRetainCaseMode == RC_UPPER)
			buf.append(String.valueOf(ch).toUpperCase());
		else if (fRetainCaseMode == RC_LOWER)
			buf.append(String.valueOf(ch).toLowerCase());
		else if (fRetainCaseMode == RC_FIRSTUPPER) {
			buf.append(String.valueOf(ch).toUpperCase());
			fRetainCaseMode= RC_MIXED;
		} else
			buf.append(ch);
	}

	/**
	 * Interprets escaped characters in the given replace pattern.
	 *
	 * @param replaceText the replace pattern
	 * @param foundText the found pattern to be replaced
	 * @return a replace pattern with escaped characters substituted by the respective characters
	 * @since 3.4
	 */
	private String interpretReplaceEscapes(String replaceText, String foundText) {
		int length= replaceText.length();
		boolean inEscape= false;
		StringBuilder buf= new StringBuilder(length);

		/* every string we did not check looks mixed at first
		 * so initialize retain case mode with RC_MIXED
		 */
		fRetainCaseMode= RC_MIXED;

		for (int i= 0; i < length; i++) {
			final char ch= replaceText.charAt(i);
			if (inEscape) {
				i= interpretReplaceEscape(ch, i, buf, replaceText, foundText);
				inEscape= false;

			} else if (ch == '\\') {
				inEscape= true;

			} else if (ch == '$') {
				buf.append(ch);

				/*
				 * Feature in java.util.regex.Matcher#replaceFirst(String):
				 * $00, $000, etc. are interpreted as $0 and
				 * $01, $001, etc. are interpreted as $1, etc. .
				 * If we support \0 as replacement pattern for capturing group 0,
				 * it would not be possible any more to write a replacement pattern
				 * that appends 0 to a capturing group (like $0\0).
				 * The fix is to interpret \00 and $00 as $0\0, and
				 * \01 and $01 as $0\1, etc.
				 */
				if (i + 2 < length) {
					char ch1= replaceText.charAt(i + 1);
					char ch2= replaceText.charAt(i + 2);
					if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') {
						buf.append("0\\"); //$NON-NLS-1$
						i++; // consume the 0
					}
				}
			} else {
				interpretRetainCase(buf, ch);
			}
		}

		if (inEscape) {
			// '\' as last character is invalid, but we still add it to get an error message
			buf.append('\\');
		}
		return buf.toString();
	}

	/**
	 * Interprets the escaped character ch at offset i
	 * of the replaceText and appends the interpretation to buf.
	 *
	 * @param ch the escaped character
	 * @param i the offset
	 * @param buf the output buffer
	 * @param replaceText the original replace pattern
	 * @param foundText the found pattern to be replaced
	 * @return the new offset
	 * @since 3.4
	 */
	private int interpretReplaceEscape(final char ch, int i, StringBuilder buf, String replaceText, String foundText) {
		int length= replaceText.length();
		switch (ch) {
			case 'r':
				buf.append('\r');
				break;
			case 'n':
				buf.append('\n');
				break;
			case 't':
				buf.append('\t');
				break;
			case 'f':
				buf.append('\f');
				break;
			case 'a':
				buf.append('\u0007');
				break;
			case 'e':
				buf.append('\u001B');
				break;
			case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries
				buf.append(TextUtilities.getDefaultLineDelimiter(fDocument));
				break;
			/*
			 * \0 for octal is not supported in replace string, since it
			 * would conflict with capturing group \0, etc.
			 */
			case '0':
				buf.append('$').append(ch);
				/*
				 * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)"
				 * in interpretReplaceEscape(String) above.
				 */
				if (i + 1 < length) {
					char ch1= replaceText.charAt(i + 1);
					if ('0' <= ch1 && ch1 <= '9') {
						buf.append('\\');
					}
				}
				break;

			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
			case '8':
			case '9':
				buf.append('$').append(ch);
				break;

			case 'c':
				if (i + 1 < length) {
					char ch1= replaceText.charAt(i + 1);
					interpretRetainCase(buf, (char)(ch1 ^ 64));
					i++;
				} else {
					String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalControlEscape", "\\c"); //$NON-NLS-1$ //$NON-NLS-2$
					throw new PatternSyntaxException(msg, replaceText, i);
				}
				break;

			case 'x':
				if (i + 2 < length) {
					int parsedInt;
					try {
						parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 3), 16);
						if (parsedInt < 0)
							throw new NumberFormatException();
					} catch (NumberFormatException e) {
						String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, i + 3)); //$NON-NLS-1$
						throw new PatternSyntaxException(msg, replaceText, i);
					}
					interpretRetainCase(buf, (char) parsedInt);
					i+= 2;
				} else {
					String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
					throw new PatternSyntaxException(msg, replaceText, i);
				}
				break;

			case 'u':
				if (i + 4 < length) {
					int parsedInt;
					try {
						parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 5), 16);
						if (parsedInt < 0)
							throw new NumberFormatException();
					} catch (NumberFormatException e) {
						String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, i + 5)); //$NON-NLS-1$
						throw new PatternSyntaxException(msg, replaceText, i);
					}
					interpretRetainCase(buf, (char) parsedInt);
					i+= 4;
				} else {
					String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
					throw new PatternSyntaxException(msg, replaceText, i);
				}
				break;

			case 'C':
				if(foundText.toUpperCase().equals(foundText)) // is whole match upper-case?
					fRetainCaseMode= RC_UPPER;
				else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case?
					fRetainCaseMode= RC_LOWER;
				else if(Character.isUpperCase(foundText.charAt(0))) // is first character upper-case?
					fRetainCaseMode= RC_FIRSTUPPER;
				else
					fRetainCaseMode= RC_MIXED;
				break;

			default:
				// unknown escape k: append uninterpreted \k
				buf.append('\\').append(ch);
				break;
		}
		return i;
	}

	/**
	 * Converts a non-regex string to a pattern
	 * that can be used with the regex search engine.
	 *
	 * @param string the non-regex pattern
	 * @return the string converted to a regex pattern
	 */
	private String asRegPattern(String string) {
		StringBuilder out= new StringBuilder(string.length());
		boolean quoting= false;

		for (int i= 0, length= string.length(); i < length; i++) {
			char ch= string.charAt(i);
			if (ch == '\\') {
				if (quoting) {
					out.append("\\E"); //$NON-NLS-1$
					quoting= false;
				}
				out.append("\\\\"); //$NON-NLS-1$
				continue;
			}
			if (!quoting) {
				out.append("\\Q"); //$NON-NLS-1$
				quoting= true;
			}
			out.append(ch);
		}
		if (quoting)
			out.append("\\E"); //$NON-NLS-1$

		return out.toString();
	}

	/**
	 * Substitutes the previous match with the given text.
	 * Sends a DocumentEvent to all registered IDocumentListener.
	 *
	 * @param text the substitution text
	 * @param regExReplace if true text represents a regular expression
	 * @return the replace region or null if there was no match
	 * @throws BadLocationException if startOffset is an invalid document offset
	 * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
	 * @throws PatternSyntaxException if a regular expression has invalid syntax
	 *
	 * @see DocumentEvent
	 * @see IDocumentListener
	 */
	public IRegion replace(String text, boolean regExReplace) throws BadLocationException {
		return findReplace(REPLACE, -1, null, text, false, false, false, regExReplace);
	}

	// ---------- CharSequence implementation ----------

	@Override
	public int length() {
		return fDocument.getLength();
	}

	@Override
	public char charAt(int index) {
		try {
			return fDocument.getChar(index);
		} catch (BadLocationException e) {
			throw new IndexOutOfBoundsException();
		}
	}

	@Override
	public CharSequence subSequence(int start, int end) {
		try {
			return fDocument.get(start, end - start);
		} catch (BadLocationException e) {
			throw new IndexOutOfBoundsException();
		}
	}

	@Override
	public String toString() {
		return fDocument.get();
	}

	/**
	 * Escapes special characters in the string, such that the resulting pattern
	 * matches the given string.
	 *
	 * @param string the string to escape
	 * @return a regex pattern that matches the given string
	 * @since 3.5
	 */
	public static String escapeForRegExPattern(String string) {
		//implements https://bugs.eclipse.org/bugs/show_bug.cgi?id=44422

		StringBuilder pattern= new StringBuilder(string.length() + 16);
		int length= string.length();
		for (int i= 0; i < length; i++) {
			char ch= string.charAt(i);
			switch (ch) {
				case '\\':
				case '(':
				case ')':
				case '[':
				case ']':
				case '{':
				case '}':
				case '.':
				case '?':
				case '*':
				case '+':
				case '|':
				case '^':
				case '$':
					pattern.append('\\').append(ch);
					break;

				case '\r':
					if (i + 1 < length && string.charAt(i + 1) == '\n')
						i++;
					//$FALL-THROUGH$
				case '\n':
					pattern.append("\\R"); //$NON-NLS-1$
					break;
				case '\t':
					pattern.append("\\t"); //$NON-NLS-1$
					break;
				case '\f':
					pattern.append("\\f"); //$NON-NLS-1$
					break;
				case 0x07:
					pattern.append("\\a"); //$NON-NLS-1$
					break;
				case 0x1B:
					pattern.append("\\e"); //$NON-NLS-1$
					break;

				default:
					if (0 <= ch && ch < 0x20) {
						pattern.append("\\x"); //$NON-NLS-1$
						String hexString= Integer.toHexString(ch).toUpperCase();
						if (hexString.length() == 1)
							pattern.append('0');
						pattern.append(hexString);
					} else {
						pattern.append(ch);
					}
			}
		}
		return pattern.toString();
	}
}