All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.springframework.expression.spel.standard.Tokenizer Maven / Gradle / Ivy

/*
 * Copyright 2002-2009 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.springframework.expression.spel.standard;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.springframework.expression.spel.InternalParseException;
import org.springframework.expression.spel.SpelMessage;
import org.springframework.expression.spel.SpelParseException;
import org.springframework.util.Assert;

/**
 * Lex some input data into a stream of tokens that can then be parsed.
 *  
 * @author Andy Clement
 * @since 3.0
 */
class Tokenizer {
	
	String expressionString;
	char[] toProcess;
	int pos;
	int max;
	List tokens = new ArrayList();
		
	public Tokenizer(String inputdata) {
		this.expressionString = inputdata;
		this.toProcess = (inputdata+"\0").toCharArray();
		this.max = toProcess.length;
		this.pos = 0;
		process();
	}
	
	public void process() {
		while (pos':
					if (isTwoCharToken(TokenKind.GE)) {
						pushPairToken(TokenKind.GE);
					} else {
						pushCharToken(TokenKind.GT);
					}
					break;
				case '<':
					if (isTwoCharToken(TokenKind.LE)) {
						pushPairToken(TokenKind.LE);
					} else {
						pushCharToken(TokenKind.LT);
					}
					break;
				case '0':
				case '1':
				case '2':
				case '3':
				case '4':
				case '5':
				case '6':
				case '7':
				case '8':
				case '9':
					lexNumericLiteral(ch=='0');
					break;
				case ' ':
				case '\t':
				case '\r':
				case '\n':
					// drift over white space
					pos++;
					break;
				case '\'':
					lexQuotedStringLiteral();
					break;
				case '"':
					lexDoubleQuotedStringLiteral();
					break;
				case 0:
					// hit sentinel at end of value
					pos++; // will take us to the end
					break;
				default:
					throw new IllegalStateException("Cannot handle ("+Integer.valueOf(ch)+") '"+ch+"'");
				}
			}
		}
	}
	
	public List getTokens() { 
		return tokens;
	}
		
	
	// STRING_LITERAL: '\''! (APOS|~'\'')* '\''!;
	private void lexQuotedStringLiteral() {
		int start = pos;
		boolean terminated = false;
		while (!terminated) {
			pos++;
			char ch = toProcess[pos];
			if (ch=='\'') {
				// may not be the end if the char after is also a '
				if (toProcess[pos+1]=='\'') {
					pos++; // skip over that too, and continue
				} else {
					terminated = true;
				}
			}
			if (ch==0) {
				throw new InternalParseException(new SpelParseException(expressionString,start,SpelMessage.NON_TERMINATING_QUOTED_STRING));
			}
		}
		pos++;
		tokens.add(new Token(TokenKind.LITERAL_STRING, subarray(start,pos), start, pos));
	}
	
	// DQ_STRING_LITERAL:	'"'! (~'"')* '"'!;
	private void lexDoubleQuotedStringLiteral() {
		int start = pos;
		boolean terminated = false;
		while (!terminated) {
			pos++;
			char ch = toProcess[pos];
			if (ch=='"') {
				terminated = true; 
			}
			if (ch==0) {
				throw new InternalParseException(new SpelParseException(expressionString,start,SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING));
			}
		}
		pos++;
		tokens.add(new Token(TokenKind.LITERAL_STRING, subarray(start,pos), start, pos));
	}
	
	
//	REAL_LITERAL :	
//	  ('.' (DECIMAL_DIGIT)+ (EXPONENT_PART)? (REAL_TYPE_SUFFIX)?) |
//		((DECIMAL_DIGIT)+ '.' (DECIMAL_DIGIT)+ (EXPONENT_PART)? (REAL_TYPE_SUFFIX)?) |
//		((DECIMAL_DIGIT)+ (EXPONENT_PART) (REAL_TYPE_SUFFIX)?) |
//		((DECIMAL_DIGIT)+ (REAL_TYPE_SUFFIX));
//	fragment INTEGER_TYPE_SUFFIX : ( 'L' | 'l' );
//	fragment HEX_DIGIT : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'|'A'|'B'|'C'|'D'|'E'|'F'|'a'|'b'|'c'|'d'|'e'|'f';		
//		
//	fragment EXPONENT_PART : 'e'  (SIGN)*  (DECIMAL_DIGIT)+ | 'E'  (SIGN)*  (DECIMAL_DIGIT)+ ;	
//	fragment SIGN :	'+' | '-' ;
//	fragment REAL_TYPE_SUFFIX : 'F' | 'f' | 'D' | 'd';
//	INTEGER_LITERAL
//	: (DECIMAL_DIGIT)+ (INTEGER_TYPE_SUFFIX)?;		
	
	private void lexNumericLiteral(boolean firstCharIsZero) {
		boolean isReal = false;
		int start = pos;
		char ch = toProcess[pos+1];
		boolean isHex = ch=='x' || ch=='X';
		
		// deal with hexadecimal
		if (firstCharIsZero && isHex) {
			pos=pos+1;
			do {
				pos++;
			} while (isHexadecimalDigit(toProcess[pos]));
			if (isChar('L','l')) {
				pushHexIntToken(subarray(start+2,pos),true, start, pos);				
				pos++;
			} else {
				pushHexIntToken(subarray(start+2,pos),false, start, pos);				
			}
			return;
		}

		// real numbers must have leading digits
		
		// Consume first part of number
		do {
			pos++;
		} while (isDigit(toProcess[pos]));

		// a '.' indicates this number is a real
		ch = toProcess[pos];
		if (ch=='.') {
			isReal = true; 
			// carry on consuming digits
			do {
				pos++;
			} while (isDigit(toProcess[pos]));
		}

		int endOfNumber = pos;
		
		// Now there may or may not be an exponent
		
		// is it a long ?
		if (isChar('L','l')) {
			if (isReal) { // 3.4L - not allowed
				throw new InternalParseException(new SpelParseException(expressionString,start,SpelMessage.REAL_CANNOT_BE_LONG));
			}
			pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
			pos++;
		} else if (isExponentChar(toProcess[pos])) {
			isReal = true; // if it wasnt before, it is now
			pos++;
			char possibleSign = toProcess[pos];
			if (isSign(possibleSign)) {
				pos++;
			}
			
			// exponent digits
			do {
				pos++;
			} while (isDigit(toProcess[pos]));
			boolean isFloat = false;
			if (isFloatSuffix(toProcess[pos])) {
				isFloat = true;
				endOfNumber = ++pos;
			} else if (isDoubleSuffix(toProcess[pos])) {				
				endOfNumber = ++pos;
			}
			pushRealToken(subarray(start,pos), isFloat, start, pos);
		} else {
			ch = toProcess[pos];
			boolean isFloat = false;
			if (isFloatSuffix(ch)) {
				isReal = true;
				isFloat = true;
				endOfNumber = ++pos;
			} else if (isDoubleSuffix(ch)) {
				isReal = true;
				endOfNumber = ++pos;				
			}
			if (isReal) {
				pushRealToken(subarray(start,endOfNumber), isFloat, start, endOfNumber);
			} else {
				pushIntToken(subarray(start,endOfNumber), false, start, endOfNumber);
			}
		}
	}
	
	// if this is changed, it must remain sorted
	private static final String[] alternativeOperatorNames = { "DIV","EQ","GE","GT","LE","LT","MOD","NE","NOT"};
	
	private void lexIdentifier() {
		int start = pos;
		do {
			pos++;
		} while (isIdentifier(toProcess[pos]));
		char[] subarray = subarray(start,pos);
		
		// Check if this is the alternative (textual) representation of an operator (see alternativeOperatorNames)
		if ((pos-start)==2 || (pos-start)==3) {
			String asString = new String(subarray).toUpperCase();
			int idx = Arrays.binarySearch(alternativeOperatorNames,asString);
			if (idx>=0) {
				pushOneCharOrTwoCharToken(TokenKind.valueOf(asString),start);
				return;
			}
		}
		tokens.add(new Token(TokenKind.IDENTIFIER,subarray,start,pos));
	}
	
	private void pushIntToken(char[] data,boolean isLong, int start, int end) {
		if (isLong) {
			tokens.add(new Token(TokenKind.LITERAL_LONG,data, start, end));
		} else {
			tokens.add(new Token(TokenKind.LITERAL_INT,data, start, end));
		}
	}

	private void pushHexIntToken(char[] data,boolean isLong, int start, int end) {
		if (data.length==0) {
			if (isLong) {				
				throw new InternalParseException(new SpelParseException(expressionString,start,SpelMessage.NOT_A_LONG,expressionString.substring(start,end+1)));
			} else {
				throw new InternalParseException(new SpelParseException(expressionString,start,SpelMessage.NOT_AN_INTEGER,expressionString.substring(start,end)));
			}
		}
		if (isLong) {
			tokens.add(new Token(TokenKind.LITERAL_HEXLONG, data, start, end));
		} else {
			tokens.add(new Token(TokenKind.LITERAL_HEXINT, data, start, end));
		}
	}
	
	private void pushRealToken(char[] data, boolean isFloat, int start, int end) {
		if (isFloat) {
			tokens.add(new Token(TokenKind.LITERAL_REAL_FLOAT, data, start, end));
		} else {
			tokens.add(new Token(TokenKind.LITERAL_REAL, data, start, end));			
		}
	}
	
	private char[] subarray(int start, int end) {
		char[] result = new char[end - start];
		System.arraycopy(toProcess, start, result, 0, end - start);
		return result;
	}
	
	/**
	 * Check if this might be a two character token.
	 */
	private boolean isTwoCharToken(TokenKind kind) {
		Assert.isTrue(kind.tokenChars.length == 2);
		Assert.isTrue(toProcess[pos] == kind.tokenChars[0]);
		return toProcess[pos+1] == kind.tokenChars[1];
	}
	
	/**
	 * Push a token of just one character in length.
	 */
	private void pushCharToken(TokenKind kind) {
		tokens.add(new Token(kind,pos,pos+1));
		pos++;
	}
	
	/**
	 * Push a token of two characters in length.
	 */
	private void pushPairToken(TokenKind kind) {
		tokens.add(new Token(kind,pos,pos+2));
		pos+=2;
	}
	
	private void pushOneCharOrTwoCharToken(TokenKind kind, int pos) {
		tokens.add(new Token(kind,pos,pos+kind.getLength()));
	}

	//	ID:	('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'$'|'0'..'9'|DOT_ESCAPED)*;
	private boolean isIdentifier(char ch) {
		return isAlphabetic(ch) || isDigit(ch) || ch=='_' || ch=='$';
	}
	
	private boolean isChar(char a,char b) {
		char ch = toProcess[pos];
		return ch==a || ch==b;
	}

	private boolean isExponentChar(char ch) {
		return ch=='e' || ch=='E';
	}

	private boolean isFloatSuffix(char ch) {
		return ch=='f' || ch=='F';
	}

	private boolean isDoubleSuffix(char ch) {
		return ch=='d' || ch=='D';
	}

	private boolean isSign(char ch) {
		return ch=='+' || ch=='-';
	}
	
	private boolean isDigit(char ch) {
		if (ch>255) {
			return false;
		}
		return (flags[ch] & IS_DIGIT)!=0;
	}

	private boolean isAlphabetic(char ch) {
		if (ch>255) {
			return false;
		}
		return (flags[ch] & IS_ALPHA)!=0;
	}
	
	private boolean isHexadecimalDigit(char ch) {
		if (ch>255) {
			return false;
		}
		return (flags[ch] & IS_HEXDIGIT)!=0;
	}
	
	private static final byte flags[] = new byte[256];	
	private static final byte IS_DIGIT=0x01;
	private static final byte IS_HEXDIGIT=0x02;
	private static final byte IS_ALPHA=0x04;

	static {
		for (int ch='0';ch<='9';ch++) {
			flags[ch]|=IS_DIGIT | IS_HEXDIGIT;
		}
		for (int ch='A';ch<='F';ch++) {
			flags[ch]|= IS_HEXDIGIT;
		}
		for (int ch='a';ch<='f';ch++) {
			flags[ch]|= IS_HEXDIGIT;
		}
		for (int ch='A';ch<='Z';ch++) {
			flags[ch]|= IS_ALPHA;
		}
		for (int ch='a';ch<='z';ch++) {
			flags[ch]|= IS_ALPHA;
		}
	}
	

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy