All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fife.ui.rsyntaxtextarea.modes.JavaScriptTokenMaker Maven / Gradle / Ivy

Go to download

RSyntaxTextArea is the syntax highlighting text editor for Swing applications. Features include syntax highlighting for 40+ languages, code folding, code completion, regex find and replace, macros, code templates, undo/redo, line numbering and bracket matching.

There is a newer version: 3.5.1
Show newest version
/*
 * 10/08/2004
 *
 * JavaScriptTokenMaker.java - An object that can take a chunk of text and
 * return a linked list of Tokens representing it in the
 * JavaScript programming language.
 * Copyright (C) 2004 Robert Futrell
 * robert_futrell at users.sourceforge.net
 * http://fifesoft.com/rsyntaxtextarea
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA.
 */
package org.fife.ui.rsyntaxtextarea.modes;

import javax.swing.text.Segment;

import org.fife.ui.rsyntaxtextarea.*;


/**
 * A token maker that turns text into a linked list of Tokens
 * for syntax highlighting in the JavaScript programming language.
 *
 * @author Robert Futrell
 * @version 0.1
 */
public class JavaScriptTokenMaker extends AbstractTokenMaker {

	protected final String operators = "+-*/%!=<>^&|?:";

	protected final String separators = "()[]{}";

	protected final String separators2 = ".,;";	// Characters you don't want syntax highlighted but separate identifiers.

	protected final String hexCharacters = "0123456789ABCDEFabcdef";

	protected final String numberEndChars = "FfLl"; // Characters used to specify literal number types.

	private int currentTokenStart;
	private int currentTokenType;


	/**
	 * Constructor.
	 */
	public JavaScriptTokenMaker() {
		super();	// Initializes tokensToHighlight.
	}


	/**
	 * Checks the token to give it the exact ID it deserves before
	 * being passed up to the super method.
	 *
	 * @param segment Segment to get text from.
	 * @param start Start offset in segment of token.
	 * @param end End offset in segment of token.
	 * @param tokenType The token's type.
	 * @param startOffset The offset in the document at which the token occurs.
	 */
	public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) {

		switch (tokenType) {
			// Since reserved words, functions, and data types are all passed
			// into here as "identifiers," we have to see what the token
			// really is...
			case Token.IDENTIFIER:
				int value = wordsToHighlight.get(segment, start,end);
				if (value!=-1)
					tokenType = value;
				break;
			case Token.WHITESPACE:
			case Token.SEPARATOR:
			case Token.OPERATOR:
			case Token.ERROR_IDENTIFIER:
			case Token.ERROR_NUMBER_FORMAT:
			case Token.ERROR_STRING_DOUBLE:
			case Token.ERROR_CHAR:
			case Token.COMMENT_EOL:
			case Token.COMMENT_MULTILINE:
			case Token.LITERAL_BOOLEAN:
			case Token.LITERAL_NUMBER_DECIMAL_INT:
			case Token.LITERAL_NUMBER_FLOAT:
			case Token.LITERAL_NUMBER_HEXADECIMAL:
			case Token.LITERAL_STRING_DOUBLE_QUOTE:
			case Token.LITERAL_CHAR:
				break;

			default:
				new Exception("Unknown tokenType: '" + tokenType + "'").
												printStackTrace();
				tokenType = Token.IDENTIFIER;
				break;

		}

		super.addToken(segment, start, end, tokenType, startOffset);

	}


	/**
	 * Returns the text to place at the beginning and end of a
	 * line to "comment" it in a this programming language.
	 *
	 * @return The start and end strings to add to a line to "comment"
	 *         it out.
	 */
	public String[] getLineCommentStartAndEnd() {
		return new String[] { "//", null };
	}


	/**
	 * Returns the words to highlight for the JavaScript programming language.
	 *
	 * @return A TokenMap containing the words to highlight for
	 *         the JavaScript programming language.
	 * @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight
	 */
	public TokenMap getWordsToHighlight() {

		TokenMap tokenMap = new TokenMap(52);

		int reservedWord = Token.RESERVED_WORD;
		tokenMap.put("abstract",						reservedWord);
		tokenMap.put("as",							reservedWord);
		tokenMap.put("break",						reservedWord);
		tokenMap.put("case",						reservedWord);
		tokenMap.put("catch",						reservedWord);
		tokenMap.put("class",						reservedWord);
		tokenMap.put("const",						reservedWord);
		tokenMap.put("continue",						reservedWord);
		tokenMap.put("debugger",						reservedWord);
		tokenMap.put("default",						reservedWord);
		tokenMap.put("delete",						reservedWord);
		tokenMap.put("do",							reservedWord);
		tokenMap.put("else",						reservedWord);
		tokenMap.put("enum",						reservedWord);
		tokenMap.put("export",						reservedWord);
		tokenMap.put("extends",						reservedWord);
		tokenMap.put("final",						reservedWord);
		tokenMap.put("finally",						reservedWord);
		tokenMap.put("for",							reservedWord);
		tokenMap.put("function",						reservedWord);
		tokenMap.put("goto",						reservedWord);
		tokenMap.put("if",							reservedWord);
		tokenMap.put("implements",					reservedWord);
		tokenMap.put("import",						reservedWord);
		tokenMap.put("in",							reservedWord);
		tokenMap.put("instanceof",					reservedWord);
		tokenMap.put("interface",					reservedWord);
		tokenMap.put("item",						reservedWord);
		tokenMap.put("namespace",					reservedWord);
		tokenMap.put("native",						reservedWord);
		tokenMap.put("new",							reservedWord);
		tokenMap.put("null",						reservedWord);
		tokenMap.put("package",						reservedWord);
		tokenMap.put("private",						reservedWord);
		tokenMap.put("protected",					reservedWord);
		tokenMap.put("public",						reservedWord);
		tokenMap.put("return",						reservedWord);
		tokenMap.put("static",						reservedWord);
		tokenMap.put("super",						reservedWord);
		tokenMap.put("switch",						reservedWord);
		tokenMap.put("synchronized",					reservedWord);
		tokenMap.put("this",						reservedWord);
		tokenMap.put("throw",						reservedWord);
		tokenMap.put("throws",						reservedWord);
		tokenMap.put("transient",					reservedWord);
		tokenMap.put("try",							reservedWord);
		tokenMap.put("typeof",						reservedWord);
		tokenMap.put("var",							reservedWord);
		tokenMap.put("void",						reservedWord);
		tokenMap.put("while",						reservedWord);
		tokenMap.put("with",						reservedWord);

		int literalBoolean = Token.LITERAL_BOOLEAN;
		tokenMap.put("false",						literalBoolean);
		tokenMap.put("true",						literalBoolean);

		int dataType = Token.DATA_TYPE;
		tokenMap.put("boolean",						dataType);
		tokenMap.put("byte",						dataType);
		tokenMap.put("char",						dataType);
		tokenMap.put("double",						dataType);
		tokenMap.put("float",						dataType);
		tokenMap.put("int",							dataType);
		tokenMap.put("long",						dataType);
		tokenMap.put("short",						dataType);

		return tokenMap;

	}


	/**
	 * Returns true always as JavaScript uses curly braces
	 * to denote code blocks.
	 *
	 * @return true always.
	 */
	public boolean getCurlyBracesDenoteCodeBlocks() {
		return true;
	}


	/**
	 * Returns true if the specified token is a '{' or
	 * '(' separator.
	 *
	 * @param t The token the previous line ends with.
	 * @return Whether the next line should be indented after this token.
	 */
	public boolean getShouldIndentNextLineAfter(Token t) {
		if (t!=null && t.textCount==1) {
			char ch = t.text[t.textOffset];
			return ch=='{' || ch=='(';
		}
		return false;
	}


	/**
	 * Returns the first token in the linked list of tokens generated
	 * from text.  This method must be implemented by
	 * subclasses so they can correctly implement syntax highlighting.
	 *
	 * @param text The text from which to get tokens.
	 * @param initialTokenType The token type we should start with.
	 * @param startOffset The offset into the document at which
	 *        text starts.
	 * @return The first Token in a linked list representing
	 *         the syntax highlighted text.
	 */
	public Token getTokenList(Segment text, int initialTokenType,
												int startOffset) {


		resetTokenList();

		char[] array = text.array;
		int offset = text.offset;
		int count = text.count;
		int end = offset + count;

		// See, when we find a token, its starting position is always of the
		// form: 'startOffset + (currentTokenStart-offset)'; but since
		// startOffset and offset are constant, tokens' starting positions
		// become: 'newStartOffset+currentTokenStart' for one less subtraction
		// operation.
		int newStartOffset = startOffset - offset;

		currentTokenStart = offset;
		currentTokenType  = initialTokenType;
		boolean backslash = false;
		boolean numContainsExponent = false;
		boolean numContainsEndCharacter = false;

		for (int i=offset; i call '"' an identifier..
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.ERROR_STRING_DOUBLE;
							}
							break;

						case '\'':
							if (backslash) { // Escaped single quote => call '\'' an identifier.
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.ERROR_CHAR;
							}
							break;

						case '\\':
							addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenType = Token.NULL;
							backslash = !backslash;
							break;

						default:
							if (RSyntaxUtilities.isDigit(c)) {
								currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
								break;
							}
							else if (RSyntaxUtilities.isLetter(c) || c=='_') {
								currentTokenType = Token.IDENTIFIER;
								break;
							}
							int indexOf = operators.indexOf(c,0);
							if (indexOf>-1) {
								currentTokenStart = i;
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
							else {
								currentTokenType = Token.ERROR_IDENTIFIER;
								break;
							}

					} // End of switch (c).

					break;

				case Token.WHITESPACE:

					switch (c) {

						case ' ':
						case '\t':
							break;	// Still whitespace.

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true; // Previous char whitespace => this must be first backslash.
							break;

						case '"': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_STRING_DOUBLE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_CHAR;
							backslash = false;
							break;

						default:	// Add the whitespace token and start anew.

							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;

							if (RSyntaxUtilities.isDigit(c)) {
								currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
								break;
							}
							else if (RSyntaxUtilities.isLetter(c) || c=='_') {
								currentTokenType = Token.IDENTIFIER;
								break;
							}
							int indexOf = operators.indexOf(c,0);
							if (indexOf>-1) {
								currentTokenStart = i;
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							else {
								currentTokenType = Token.ERROR_IDENTIFIER;
							}

					} // End of switch (c).

					break;

				default: // Should never happen
				case Token.IDENTIFIER:

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_STRING_DOUBLE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_CHAR;
							backslash = false;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true;
							break;

						default:
							if (RSyntaxUtilities.isLetterOrDigit(c) || c=='_') {
								break;	// Still an identifier of some type.
							}
							int indexOf = operators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							else {
								currentTokenType = Token.ERROR_IDENTIFIER;
							}

					} // End of switch (c).

					break;

				case Token.LITERAL_NUMBER_DECIMAL_INT:

					// Reset our boolean states if we only have one digit char before
					// the current one.
					if (currentTokenStart==i-1) {
						numContainsExponent = false;
						numContainsEndCharacter = false;
					}

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_STRING_DOUBLE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_CHAR;
							backslash = false;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true;
							break;

						default:

							if (RSyntaxUtilities.isDigit(c)) {
								break;	// Still a literal number.
							}
							else if (c=='e') {	// Exponent.
								if (numContainsExponent==false) {
									numContainsExponent = true;
								}
								else {
									currentTokenType = Token.ERROR_NUMBER_FORMAT;
								}
								break;
							}
							else if (c=='.') { // Decimal point.
								if (numContainsExponent==true) {
									currentTokenType = Token.ERROR_NUMBER_FORMAT;
								}
								else {
									currentTokenType = Token.LITERAL_NUMBER_FLOAT;
								}
								break;
							}
							int indexOf = numberEndChars.indexOf(c);
							if (indexOf>-1) {	// Numbers can end in 'f','F','l','L', etc.
								if (numContainsEndCharacter==true) {
									currentTokenType = Token.ERROR_NUMBER_FORMAT;
								}
								else {
									numContainsEndCharacter = true;
								}
								break;
							}
							indexOf = operators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}

							// Otherwise, the token is an error.
							currentTokenType = Token.ERROR_NUMBER_FORMAT;

					} // End of switch (c).

					break;

				case Token.LITERAL_NUMBER_FLOAT:

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_FLOAT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_FLOAT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_STRING_DOUBLE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_FLOAT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_CHAR;
							backslash = false;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_FLOAT, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true;
							break;

						default:

							if (RSyntaxUtilities.isDigit(c)) {
								break;	// Still a literal number.
							}
							else if (c=='e') {	// Exponent.
								if (numContainsExponent==false) {
									numContainsExponent = true;
								}
								else {
									currentTokenType = Token.ERROR_NUMBER_FORMAT;
								}
								break;
							}
							else if (c=='.') { // Second decimal point; must catch now because it's a "separator" below.
								currentTokenType = Token.ERROR_NUMBER_FORMAT;
								break;
							}
							int indexOf = numberEndChars.indexOf(c);
							if (indexOf>-1) {	// Numbers can end in 'f','F','l','L', etc.
								if (numContainsEndCharacter==true) {
									currentTokenType = Token.ERROR_NUMBER_FORMAT;
								}
								else {
									numContainsEndCharacter = true;
								}
								break;
							}
							indexOf = operators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_FLOAT, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_FLOAT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_FLOAT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}

							// Otherwise, the token is an error.
							currentTokenType = Token.ERROR_NUMBER_FORMAT;

					} // End of switch (c).

					break;

				case Token.LITERAL_NUMBER_HEXADECIMAL:

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_STRING_DOUBLE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_CHAR;
							backslash = false;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true;
							break;

						default:

							if (c=='e') {	// Exponent.
								if (numContainsExponent==false) {
									numContainsExponent = true;
								}
								else {
									currentTokenType = Token.ERROR_NUMBER_FORMAT;
								}
								break;
							}
							int indexOf = hexCharacters.indexOf(c);
							if (indexOf>-1) {
								break;	// Still a hexadecimal number.
							}
							indexOf = numberEndChars.indexOf(c);
							if (indexOf>-1) {	// Numbers can end in 'f','F','l','L', etc.
								if (numContainsEndCharacter==true) {
									currentTokenType = Token.ERROR_NUMBER_FORMAT;
								}
								else {
									numContainsEndCharacter = true;
								}
								break;
							}
							indexOf = operators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.OPERATOR;
								break;
							}
							indexOf = separators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}

							// Otherwise, the token is an error.
							currentTokenType = Token.ERROR_NUMBER_FORMAT;

					} // End of switch (c).

					break;

				case Token.COMMENT_MULTILINE:

					// Find either end of MLC or end of the current line.
					while (i < end-1) {
						if (array[i]=='*' && array[i+1]=='/') {
							addToken(text, currentTokenStart,i+1, Token.COMMENT_MULTILINE, newStartOffset+currentTokenStart);
							i = i + 1;
							currentTokenType = Token.NULL;
							backslash = false; // Backslashes can't accumulate before and after a comment...
							break;
						}
						i++;
					}

					break;

				case Token.COMMENT_EOL:
					i = end - 1;
					addToken(text, currentTokenStart,i, Token.COMMENT_EOL, newStartOffset+currentTokenStart);
					// We need to set token type to null so at the bottom we don't add one more token.
					currentTokenType = Token.NULL;
					break;

				// We need this state because comments always start with '/', which is an operator.
				// Note that when we enter this state, the PREVIOUS character was an operator.
				case Token.OPERATOR:

					if (array[i-1]=='/') { // Possibility of comments.

						if (c=='*') {
							currentTokenType = Token.COMMENT_MULTILINE;
							break;
						}

						else if (c=='/') {
							currentTokenType = Token.COMMENT_EOL;
							i = end - 1;	// Since we know the rest of the line is in this token.
						}

						else {
							// We MUST add the token at the previous char now; if we don't and let
							// operators accumulate before we print them, we will mess up syntax
							// highlighting if we get an end-of-line comment.
							addToken(text, currentTokenStart,i-1, Token.OPERATOR, newStartOffset+currentTokenStart);
							currentTokenType = Token.NULL;
							i = i - 1;
						}

					}

					else {

						addToken(text, currentTokenStart,i-1, Token.OPERATOR, newStartOffset+currentTokenStart);

						// Hack to keep code size down...
						i--;
						currentTokenType = Token.NULL;

					}

					break;

				case Token.ERROR_IDENTIFIER:

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_STRING_DOUBLE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.ERROR_CHAR;
							backslash = false;
							break;

						case ';':
							addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true; // Must be first backslash in a row since previous character is identifier char.
							break;

						default:
							int indexOf = operators.indexOf(c);
							if (indexOf>-1) {
									addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
									currentTokenStart = i;
									currentTokenType = Token.OPERATOR;
							}
							indexOf = separators.indexOf(c);
							if (indexOf>-1) {
									addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
									addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
									currentTokenType = Token.NULL;
							}
							indexOf = separators2.indexOf(c);
							if (indexOf>-1) {
									addToken(text, currentTokenStart,i-1, Token.ERROR_IDENTIFIER, newStartOffset+currentTokenStart);
									addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
									currentTokenType = Token.NULL;
							}

					} // End of switch (c).

					break;

				case Token.ERROR_NUMBER_FORMAT:

						switch (c) {

							case ' ':
							case '\t':
								addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.WHITESPACE;
								break;

							case '"': // Don't need to worry about backslashes as previous char is non-backslash.
								addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.ERROR_STRING_DOUBLE;
								backslash = false;
								break;

							case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
								addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
								currentTokenStart = i;
								currentTokenType = Token.ERROR_CHAR;
								backslash = false;
								break;

							case ';':
								addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;

							case '\\':
								addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								backslash = true; // Must be first backslash in a row since previous char is a number char.
								break;

							default:

								// Could be going into hexadecimal.
								int indexOf = hexCharacters.indexOf(c);
								if (indexOf>-1 && (i-currentTokenStart==2 && array[i-1]=='x' && array[i-2]=='0')) {
									currentTokenType = Token.LITERAL_NUMBER_HEXADECIMAL;
									break;
								}

								indexOf = operators.indexOf(c);
								if (indexOf>-1) {
										addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
										currentTokenStart = i;
										currentTokenType = Token.OPERATOR;
								}
								indexOf = separators.indexOf(c);
								if (indexOf>-1) {
										addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
										addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
										currentTokenType = Token.NULL;
								}
								indexOf = separators2.indexOf(c);
								if (indexOf>-1) {
										addToken(text, currentTokenStart,i-1, Token.ERROR_NUMBER_FORMAT, newStartOffset+currentTokenStart);
										addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
										currentTokenType = Token.NULL;
								}

						} // End of switch (c).

						break;

				case Token.ERROR_CHAR:

						if (c=='\\') {
							backslash = !backslash; // Okay because if we got in here, backslash was initially false.
						}
						else {

							if (c=='\'' && !backslash) {
								addToken(text, currentTokenStart,i, Token.LITERAL_CHAR, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								// backslash is definitely false when we leave.
							}

							backslash = false; // Need to set backslash to false here as a character was typed.

						}
						// Otherwise, we're still an unclosed char...

						break;

				case Token.ERROR_STRING_DOUBLE:

						if (c=='\\') {
							backslash = !backslash; // Okay because if we got in here, backslash was initially false.
						}
						else {
							if (c=='"' && !backslash) {
								addToken(text, currentTokenStart,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								// backslash is definitely false when we leave.
							}

							backslash = false; // Need to set backslash to false here as a character was typed.

						}
						// Otherwise, we're still an unclosed string...

						break;

			} // End of switch (currentTokenType).

		} // End of for (int i=offset; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy