org.fife.ui.rsyntaxtextarea.modes.XMLTokenMaker.flex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of rsyntaxtextarea Show documentation
RSyntaxTextArea is the syntax highlighting text editor for Swing applications. Features include syntax highlighting for 40+ languages, code folding, code completion, regex find and replace, macros, code templates, undo/redo, line numbering and bracket matching.
There is a newer version: 3.5.2
Show newest version
/*
 * 01/24/2005
 *
 * XMLTokenMaker.java - Generates tokens for XML syntax highlighting.
 * 
 * This library is distributed under a modified BSD license.  See the included
 * LICENSE file for details.
 */
package org.fife.ui.rsyntaxtextarea.modes;

import java.io.*;
import javax.swing.text.Segment;

import org.fife.ui.rsyntaxtextarea.*;


/**
 * Scanner for XML.
 *
 * This implementation was created using
 * JFlex 1.4.1; however, the generated file
 * was modified for performance.  Memory allocation needs to be almost
 * completely removed to be competitive with the handwritten lexers (subclasses
 * of AbstractTokenMaker), so this class has been modified so that
 * Strings are never allocated (via yytext()), and the scanner never has to
 * worry about refilling its buffer (needlessly copying chars around).
 * We can achieve this because RText always scans exactly 1 line of tokens at a
 * time, and hands the scanner this line as an array of characters (a Segment
 * really).  Since tokens contain pointers to char arrays instead of Strings
 * holding their contents, there is no need for allocating new memory for
 * Strings.
 *
 * The actual algorithm generated for scanning has, of course, not been
 * modified.

 *
 * If you wish to regenerate this file yourself, keep in mind the following:
 * 

 *   The generated XMLTokenMaker.java file will contain two
 *       definitions of both zzRefill and yyreset.
 *       You should hand-delete the second of each definition (the ones
 *       generated by the lexer), as these generated methods modify the input
 *       buffer, which we'll never have to do.
 *   You should also change the declaration/definition of zzBuffer to NOT
 *       be initialized.  This is a needless memory allocation for us since we
 *       will be pointing the array somewhere else anyway.
 *   You should NOT call yylex() on the generated scanner
 *       directly; rather, you should use getTokenList as you would
 *       with any other TokenMaker instance.
 * 
 *
 * @author Robert Futrell
 * @version 0.5
 *
 */
%%

%public
%class XMLTokenMaker
%extends AbstractMarkupTokenMaker
%unicode
%type org.fife.ui.rsyntaxtextarea.Token


%{

	/**
	 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
	 * double-quote attribute.
	 */
	public static final int INTERNAL_ATTR_DOUBLE			= -1;


	/**
	 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
	 * single-quote attribute.
	 */
	public static final int INTERNAL_ATTR_SINGLE			= -2;


	/**
	 * Token type specific to XMLTokenMaker denoting a line ending with an
	 * unclosed XML tag; thus a new line is beginning still inside of the tag.
	 */
	public static final int INTERNAL_INTAG					= -3;

	/**
	 * Token type specific to XMLTokenMaker denoting a line ending with an
	 * unclosed DOCTYPE element.
	 */
	public static final int INTERNAL_DTD					= -4;

	/**
	 * Token type specific to XMLTokenMaker denoting a line ending with an
	 * unclosed, locally-defined DTD in a DOCTYPE element.
	 */
	public static final int INTERNAL_DTD_INTERNAL			= -5;

	/**
	 * Token type specific to XMLTokenMaker denoting a line ending with an
	 * unclosed comment.  The state to return to when this comment ends is
	 * embedded in the token type as well.
	 */
	public static final int INTERNAL_IN_XML_COMMENT			= -(1<<11);

	/**
	 * Whether closing markup tags are automatically completed for HTML.
	 */
	private static boolean completeCloseTags;

	/**
	 * Whether the DTD we're currently in is a locally-defined one.  This
	 * field is only valid when in a DOCTYPE element (the  state).
	 */
	private boolean inInternalDtd;

	/**
	 * The state we were in prior to the current one.  This is used to know
	 * what state to resume after an MLC ends.
	 */
	private int prevState;


	/**
	 * Constructor.  This must be here because JFlex does not generate a
	 * no-parameter constructor.
	 */
	public XMLTokenMaker() {
	}


	static {
		completeCloseTags = true;
	}


	/**
	 * Adds the token specified to the current linked list of tokens as an
	 * "end token;" that is, at zzMarkedPos.
	 *
	 * @param tokenType The token's type.
	 */
	private void addEndToken(int tokenType) {
		addToken(zzMarkedPos,zzMarkedPos, tokenType);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 * @see #addToken(int, int, int)
	 */
	private void addHyperlinkToken(int start, int end, int tokenType) {
		int so = start + offsetShift;
		addToken(zzBuffer, start,end, tokenType, so, true);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 */
	private void addToken(int tokenType) {
		addToken(zzStartRead, zzMarkedPos-1, tokenType);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 */
	private void addToken(int start, int end, int tokenType) {
		int so = start + offsetShift;
		addToken(zzBuffer, start,end, tokenType, so);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param array The character array.
	 * @param start The starting offset in the array.
	 * @param end The ending offset in the array.
	 * @param tokenType The token's type.
	 * @param startOffset The offset in the document at which this token
	 *                    occurs.
	 */
	@Override
	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
		super.addToken(array, start,end, tokenType, startOffset);
		zzStartRead = zzMarkedPos;
	}


	protected OccurrenceMarker createOccurrenceMarker() {
		return new XmlOccurrenceMarker();
	}


	/**
	 * Returns whether markup close tags should be completed.  For XML, the
	 * default value is true.
	 *
	 * @return Whether closing markup tags are completed.
	 * @see #setCompleteCloseTags(boolean)
	 */
	@Override
	public boolean getCompleteCloseTags() {
		return completeCloseTags;
	}


	/**
	 * Static version of {@link #getCompleteCloseTags()}.  This hack is
	 * unfortunately needed for applications to be able to query this value
	 * without instantiating this class.
	 *
	 * @return Whether closing markup tags are completed.
	 * @see #setCompleteCloseTags(boolean)
	 */
	public static boolean getCompleteCloseMarkupTags() {
		return completeCloseTags;
	}


	/**
	 * Returns Token.MARKUP_TAG_NAME.
	 *
	 * @param type The token type.
	 * @return Whether tokens of this type should have "mark occurrences"
	 *         enabled.
	 */
	public boolean getMarkOccurrencesOfTokenType(int type) {
		return type==Token.MARKUP_TAG_NAME;
	}


	/**
	 * Returns the first token in the linked list of tokens generated
	 * from text.  This method must be implemented by
	 * subclasses so they can correctly implement syntax highlighting.
	 *
	 * @param text The text from which to get tokens.
	 * @param initialTokenType The token type we should start with.
	 * @param startOffset The offset into the document at which
	 *        text starts.
	 * @return The first Token in a linked list representing
	 *         the syntax highlighted text.
	 */
	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {

		resetTokenList();
		this.offsetShift = -text.offset + startOffset;
		prevState = YYINITIAL;
		inInternalDtd = false;

		// Start off in the proper state.
		int state = YYINITIAL;
		switch (initialTokenType) {
			case Token.MARKUP_COMMENT:
				state = COMMENT;
				break;
			case INTERNAL_DTD:
				state = DTD;
				break;
			case INTERNAL_DTD_INTERNAL:
				state = DTD;
				inInternalDtd = true;
				break;
			case INTERNAL_ATTR_DOUBLE:
				state = INATTR_DOUBLE;
				break;
			case INTERNAL_ATTR_SINGLE:
				state = INATTR_SINGLE;
				break;
			case Token.MARKUP_PROCESSING_INSTRUCTION:
				state = PI;
				break;
			case INTERNAL_INTAG:
				state = INTAG;
				break;
			case Token.MARKUP_CDATA:
				state = CDATA;
				break;
			default:
				if (initialTokenType<-1024) { // INTERNAL_IN_XML_COMMENT - prevState
					int main = -(-initialTokenType & 0xffffff00);
					switch (main) {
						default: // Should never happen
						case INTERNAL_IN_XML_COMMENT:
							state = COMMENT;
							break;
					}
					prevState = -initialTokenType&0xff;
				}
				else { // Shouldn't happen
					state = Token.NULL;
				}
		}

		start = text.offset;
		s = text;
		try {
			yyreset(zzReader);
			yybegin(state);
			return yylex();
		} catch (IOException ioe) {
			ioe.printStackTrace();
			return new TokenImpl();
		}

	}


	/**
	 * Sets whether markup close tags should be completed.
	 *
	 * @param complete Whether closing markup tags are completed.
	 * @see #getCompleteCloseTags()
	 */
	public static void setCompleteCloseTags(boolean complete) {
		completeCloseTags = complete;
	}


	/**
	 * Refills the input buffer.
	 *
	 * @return      true if EOF was reached, otherwise
	 *              false.
	 */
	private boolean zzRefill() {
		return zzCurrentPos>=s.offset+s.count;
	}


	/**
	 * Resets the scanner to read from a new input stream.
	 * Does not close the old reader.
	 *
	 * All internal variables are reset, the old input stream 
	 * cannot be reused (internal buffer is discarded and lost).
	 * Lexical state is set to YY_INITIAL.
	 *
	 * @param reader   the new input stream 
	 */
	public final void yyreset(java.io.Reader reader) {
		// 's' has been updated.
		zzBuffer = s.array;
		/*
		 * We replaced the line below with the two below it because zzRefill
		 * no longer "refills" the buffer (since the way we do it, it's always
		 * "full" the first time through, since it points to the segment's
		 * array).  So, we assign zzEndRead here.
		 */
		//zzStartRead = zzEndRead = s.offset;
		zzStartRead = s.offset;
		zzEndRead = zzStartRead + s.count - 1;
		zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
		zzLexicalState = YYINITIAL;
		zzReader = reader;
		zzAtBOL  = true;
		zzAtEOF  = false;
	}


%}

NameStartChar       = (":" | [A-Z] | "_" | [a-z] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF] | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD])
NameChar			= ({NameStartChar} | "-" | "." | [0-9] | #xB7 | [\u0300-\u036F] | [\u203F-\u2040])
TagName				= ({NameStartChar}{NameChar}*)
Whitespace			= ([ \t\f])
LineTerminator			= ([\n])
Identifier			= ([^ \t\n<&]+)
EntityReference		= ([&][^; \t]*[;]?)
InTagIdentifier		= ([^ \t\n\"\'=\/>]+)
CDataBegin			= ("")

URLGenDelim				= ([:\/\?#\[\]@])
URLSubDelim				= ([\!\$&'\(\)\*\+,;=])
URLUnreserved			= ([A-Za-z_0-9\-\.\~])
URLCharacter			= ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
URLCharacters			= ({URLCharacter}*)
URLEndCharacter			= ([\/\$A-Za-z0-9])
URL						= (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)

%state COMMENT
%state PI
%state DTD
%state INTAG
%state INATTR_DOUBLE
%state INATTR_SINGLE
%state CDATA

%%

 {
	""						{ int temp = zzMarkedPos; addToken(start,zzStartRead+2, Token.MARKUP_COMMENT); start = temp; yybegin(prevState); }
	"-"							{}
	{LineTerminator} |
	<>						{ addToken(start,zzStartRead-1, Token.MARKUP_COMMENT); addEndToken(INTERNAL_IN_XML_COMMENT - prevState); return firstToken; }
}

 {
	[^\n\?]+						{}
	{LineTerminator}				{ addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; }
	"?>"							{ yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.MARKUP_PROCESSING_INSTRUCTION); }
	"?"							{}
	<>						{ addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; }
}

 {
	[^\n\[\]<>]+				{}
	"