org.eclipse.jdt.internal.core.search.StringOperation Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2000, 2010 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
package org.eclipse.jdt.internal.core.search;

import org.eclipse.jdt.core.compiler.CharOperation;
import org.eclipse.jdt.internal.compiler.parser.ScannerHelper;

/**
 * This class is a collection of helper methods to manipulate strings during search.
 */
public final class StringOperation {

	private final static int[] EMPTY_REGIONS = new int[0];

/**
 * Answers all the regions in a given name matching a given camel case pattern.
 * 

 * Each of these regions is made of its starting index and its length in the given
 * name. They are all concatenated in a single array of int
 * which therefore always has an even length.
 * 

 * Note that each region is disjointed from the following one.

 * E.g. if the regions are { start1, length1, start2, length2 },
 * then start1+length1 will always be smaller than
 * start2.
 * 

 * 
 * Examples:
 *   pattern = "NPE"
 *  name = NullPointerException / NoPermissionException
 *  result:  { 0, 1, 4, 1, 11, 1 } / { 0, 1, 2, 1, 12, 1 } 
 *   pattern = "NuPoEx"
 *  name = NullPointerException
 *  result:  { 0, 2, 4, 2, 11, 2 }
 *   pattern = "IPL3"
 *  name = "IPerspectiveListener3"
 *  result:  { 0, 2, 12, 1, 20, 1 }
 *   pattern = "HashME"
 *  name = "HashMapEntry"
 *  result:  { 0, 5, 7, 1 }
 * 
 *
 * @see CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean)
 * 	for more details on the camel case behavior
 * @see CharOperation#match(char[], char[], boolean) for more details on the
 * 	pattern match behavior
 *
 * @param pattern the given pattern
 * @param patternStart the start index of the pattern, inclusive
 * @param patternEnd the end index of the pattern, exclusive
 * @param name the given name
 * @param nameStart the start index of the name, inclusive
 * @param nameEnd the end index of the name, exclusive
 * @param samePartCount flag telling whether the pattern and the name should
 * 	have the same count of parts or not.

 * 	  For example:
 * 	
 * 		'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types,
 * 				but not 'HashMapEntry'
 * 		'HMap' type string pattern will still match previous 'HashMap' and
 * 				'HtmlMapper' types, but not 'HighMagnitude'
 * 	
 * @return an array of int having two slots per returned
 * 	regions (first one is the starting index of the region and the second
 * 	one the length of the region).

 * 	Note that it may be null if the given name does not match
 * 	the pattern
 * @since 3.5
 */
public static final int[] getCamelCaseMatchingRegions(String pattern, int patternStart, int patternEnd, String name, int nameStart, int nameEnd, boolean samePartCount) {

	/* !!!!!!!!!! WARNING !!!!!!!!!!
	 * The algorithm used in this method has been fully inspired from
	 * CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean).
	 *
	 * So, if any change needs to be applied in the algorithm, do NOT forget
	 * to backport it in the CharOperation method!
	 */

	if (name == null)
		return null; // null name cannot match
	if (pattern == null) {
		// null pattern cannot match any region
		// see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816
		return EMPTY_REGIONS;
	}
	if (patternEnd < 0) 	patternEnd = pattern.length();
	if (nameEnd < 0) nameEnd = name.length();

	if (patternEnd <= patternStart) {
		return nameEnd <= nameStart
			? new int[] { patternStart, patternEnd-patternStart }
			: null;
	}
	if (nameEnd <= nameStart) return null;
	// check first pattern char
	if (name.charAt(nameStart) != pattern.charAt(patternStart)) {
		// first char must strictly match (upper/lower)
		return null;
	}

	char patternChar, nameChar;
	int iPattern = patternStart;
	int iName = nameStart;

	// init segments
	int parts = 1;
	for (int i=patternStart+1; i break if the digit is current pattern character otherwise consume it
					if (patternChar == nameChar) break;
					iName++;
				// nameChar is uppercase...
				} else  if (patternChar != nameChar) {
					//.. and it does not match patternChar, so it's not a match
					return null;
				} else {
					//.. and it matched patternChar. Back to the big loop
					break;
				}
			}
			// Same tests for non-obvious characters
			else if (Character.isJavaIdentifierPart(nameChar) && !Character.isUpperCase(nameChar)) {
				iName++;
			} else if (Character.isDigit(nameChar)) {
				if (patternChar == nameChar) break;
				iName++;
			} else  if (patternChar != nameChar) {
				return null;
			} else {
				break;
			}
		}
		// At this point, either name has been exhausted, or it is at an uppercase letter.
		// Since pattern is also at an uppercase letter
		if (segments == null) {
			segments = new int[parts*2];
		}
		segments[count++] = segmentStart;
		segments[count++] = segmentEnd - segmentStart;
		segmentStart = iName;
	}
}

/**
 * Answers all the regions in a given name matching a given pattern
 * pattern (e.g. "H*M??").
 * 

 * Each of these regions is made of its starting index and its length in the given
 * name. They are all concatenated in a single array of int
 * which therefore always has an even length.
 * 

 * Note that each region is disjointed from the following one.

 * E.g. if the regions are { start1, length1, start2, length2 },
 * then start1+length1 will always be smaller than
 * start2.
 * 

 * 
 * Examples:
 * 
 *   pattern = "N???Po*Ex?eption"
 *  name = NullPointerException
 *  result:  { 0, 1, 4, 2, 11, 2, 14, 6 }
 *   pattern = "Ha*M*ent*"
 *  name = "HashMapEntry"
 *  result:  { 0, 2, 4, 1, 7, 3 }
 * 
 *
 * @see CharOperation#match(char[], char[], boolean) for more details on the
 * 	pattern match behavior
 *
 * @param pattern the given pattern
 * @param patternStart the given pattern start
 * @param patternEnd the given pattern end
 * @param name the given name
 * @param nameStart the given name start
 * @param nameEnd the given name end
 * @param isCaseSensitive flag to know if the matching should be case sensitive
 * @return an array of int having two slots per returned
 * 	regions (first one is the starting index of the region and the second
 * 	one the length of the region).

 * 	Note that it may be null if the given name does not match
 * 	the pattern
 * @since 3.5
 */
public static final int[] getPatternMatchingRegions(
	String pattern,
	int patternStart,
	int patternEnd,
	String name,
	int nameStart,
	int nameEnd,
	boolean isCaseSensitive) {

	/* !!!!!!!!!! WARNING !!!!!!!!!!
	 * The algorithm used in this method has been fully inspired from
	 * CharOperation#match(char[], int, int, char[], int, int, boolean).
	 *
	 * So, if any change needs to be applied in the algorithm, do NOT forget
	 * to backport it in the CharOperation method!
	 */

	if (name == null) return null; // null name cannot match
	if (pattern == null) {
		// null pattern cannot match any region
		// see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816
		return EMPTY_REGIONS;
	}
	int iPattern = patternStart;
	int iName = nameStart;

	// init segments parts
	if (patternEnd < 0)
		patternEnd = pattern.length();
	if (nameEnd < 0)
		nameEnd = name.length();
	int questions = 0;
	int parts = 0;
	char previous = 0;
	for (int i=patternStart; i 0 && previous != '?') {
			segments[count++] = start;
			segments[count++] = iName-start;
			start = iName;
		}
		segmentStart = ++iPattern; // skip star
	} else {
		if (iName == nameEnd) {
			if (count == (parts*2)) return segments;
			int end = patternEnd;
			if (previous == '?') { // last char was a '?' => purge all trailing '?'
				while (pattern.charAt(--end-1) == '?') {
					if (end == start) {
						return new int[] { patternStart, patternEnd-patternStart };
					}
				}
			}
			return new int[] { start, end-start };
		}
		return null;
	}
	int prefixStart = iName;
	int previousCount = count;
	previous = patternChar;
	char previousSegment = patternChar;
	checkSegment : while (iName < nameEnd) {
		if (iPattern == patternEnd) {
			iPattern = segmentStart; // mismatch - restart current segment
			iName = ++prefixStart;
			previous = previousSegment;
			continue checkSegment;
		}
		/* segment is ending */
		if ((patternChar = pattern.charAt(iPattern)) == '*') {
			segmentStart = ++iPattern; // skip star
			if (segmentStart == patternEnd) {
				if (count < (parts*2)) {
					segments[count++] = start;
					segments[count++] = iName-start;
				}
				return segments;
			}
			switch (previous) {
				case '*':
				case '?':
					break;
				default:
					segments[count++] = start;
					segments[count++] = iName-start;
					break;
			}
			prefixStart = iName;
			start = prefixStart;
			previous = patternChar;
			previousSegment = patternChar;
			continue checkSegment;
		}
		/* check current name character */
		previousCount = count;
		if (patternChar == '?') {
			switch (previous) {
				case '*':
				case '?':
					break;
				default:
					segments[count++] = start;
					segments[count++] = iName-start;
					break;
			}
		} else {
			boolean mismatch;
			if (isCaseSensitive) {
				mismatch = name.charAt(iName) != patternChar;
			} else {
				mismatch = ScannerHelper.toLowerCase(name.charAt(iName)) != ScannerHelper.toLowerCase(patternChar);
			}
			if (mismatch) {
				iPattern = segmentStart; // mismatch - restart current segment
				iName = ++prefixStart;
				start = prefixStart;
				count = previousCount;
				previous = previousSegment;
				continue checkSegment;
			}
			switch (previous) {
				case '?':
					start = iName;
					break;
			}
		}
		iName++;
		iPattern++;
		previous = patternChar;
	}

	if ((segmentStart == patternEnd)
		|| (iName == nameEnd && iPattern == patternEnd)
		|| (iPattern == patternEnd - 1 && pattern.charAt(iPattern) == '*')) {
		if (count < (parts*2)) {
			segments[count++] = start;
			segments[count++] = iName-start;
		}
		return segments;
	}
	return null;
}
}