All Downloads are FREE. Search and download functionalities are using the official Maven repository.

to.etc.syntaxer.TokenMarker Maven / Gradle / Ivy

The newest version!
package to.etc.syntaxer;

import gnu.regexp.*;
import javax.swing.text.Segment;
import java.util.*;

/**
 * A token marker splits lines of text into tokens. Each token carries
 * a length field and an identification tag that can be mapped to a color
 * or font style for painting that token.
 *
 * @author Slava Pestov, mike dillon
 * @version $Id: TokenMarker.java,v 1.62 2003/12/27 05:14:46 spestov Exp $
 *
 * @see org.gjt.sp.jedit.syntax.Token
 * @see org.gjt.sp.jedit.syntax.TokenHandler
 */
public class TokenMarker
{
	//{{{ TokenMarker constructor
	public TokenMarker()
	{
		m_ruleSets = new Hashtable(64);
	} //}}}

	//{{{ addRuleSet() method
	public void addRuleSet(ParserRuleSet rules)
	{
		m_ruleSets.put(rules.getSetName(), rules);

		if(rules.getSetName().equals("MAIN"))
			m_mainRuleSet = rules;
	} //}}}

	//{{{ getMainRuleSet() method
	public ParserRuleSet getMainRuleSet()
	{
		return m_mainRuleSet;
	} //}}}

	//{{{ getRuleSet() method
	public ParserRuleSet getRuleSet(String setName)
	{
		return (ParserRuleSet) m_ruleSets.get(setName);
	} //}}}

	//{{{ getRuleSets() method
	/**
	 * @since jEdit 4.2pre3
	 */
	public ParserRuleSet[] getRuleSets()
	{
		return (ParserRuleSet[]) m_ruleSets.values().toArray(new ParserRuleSet[m_ruleSets.size()]);
	} //}}}

	//{{{ markTokens() method
	/**
	 * Do not call this method directly; call Buffer.markTokens() instead.
	 */
	public LineContext markTokens(LineContext prevContext, TokenHandler tokenHandler, Segment line)
	{
		//{{{ Set up some instance variables
		// this is to avoid having to pass around lots and lots of
		// parameters.
		this.m_tokenHandler = tokenHandler;
		this.m_line = line;

		m_lastOffset = line.offset;
		m_lineLength = line.count + line.offset;

		m_context = new LineContext();

		if(prevContext == null) {
			m_context.rules = getMainRuleSet();
			if(m_context.rules == null)
				throw new IllegalStateException("No context rules??");
		} else {
			m_context.parent = prevContext.parent;
			m_context.inRule = prevContext.inRule;
			m_context.rules = prevContext.rules;
			m_context.spanEndSubst = prevContext.spanEndSubst;
		}

		m_keywords = m_context.rules.getKeywords();
		m_escaped = false;

		m_seenWhitespaceEnd = false;
		m_whitespaceEnd = line.offset;
		//}}}

		//{{{ Main parser loop
		ParserRule rule;
		int terminateChar = m_context.rules.getTerminateChar();
		boolean terminated = false;

		main_loop : for(m_pos = line.offset; m_pos < m_lineLength; m_pos++)
		{
			//{{{ check if we have to stop parsing
			if(terminateChar >= 0 && m_pos - line.offset >= terminateChar && !terminated)
			{
				terminated = true;
				m_context = new LineContext(ParserRuleSet.getStandardRuleSet(m_context.rules.getDefault()), m_context);
				m_keywords = m_context.rules.getKeywords();
			} //}}}

			//{{{ check for end of delegate
			if(m_context.parent != null)
			{
				rule = m_context.parent.inRule;
				if(rule != null)
				{
					if(checkDelegateEnd(rule))
					{
						m_seenWhitespaceEnd = true;
						continue main_loop;
					}
				}
			} //}}}

			//{{{ check every rule
			char ch = line.array[m_pos];

			rule = m_context.rules.getRules(ch);
			while(rule != null)
			{
				// stop checking rules if there was a match
				if(handleRule(rule, false))
				{
					m_seenWhitespaceEnd = true;
					continue main_loop;
				}

				rule = rule.next;
			} //}}}

			//{{{ check if current character is a word separator
			if(Character.isWhitespace(ch))
			{
				if(!m_seenWhitespaceEnd)
					m_whitespaceEnd = m_pos + 1;

				if(m_context.inRule != null)
					handleRule(m_context.inRule, true);

				handleNoWordBreak();

				markKeyword(false);

				if(m_lastOffset != m_pos)
				{
					tokenHandler.handleToken(line, m_context.rules.getDefault(), m_lastOffset - line.offset, m_pos - m_lastOffset, m_context);
				}

				tokenHandler.handleToken(line, m_context.rules.getDefault(), m_pos - line.offset, 1, m_context);
				m_lastOffset = m_pos + 1;

				m_escaped = false;
			}
			else
			{
				if(m_keywords != null || m_context.rules.getRuleCount() != 0)
				{
					String noWordSep = m_context.rules.getNoWordSep();

					if(!Character.isLetterOrDigit(ch) && noWordSep.indexOf(ch) == -1)
					{
						if(m_context.inRule != null)
							handleRule(m_context.inRule, true);

						handleNoWordBreak();

						markKeyword(true);

						tokenHandler.handleToken(line, m_context.rules.getDefault(), m_lastOffset - line.offset, 1, m_context);
						m_lastOffset = m_pos + 1;
					}
				}

				m_seenWhitespaceEnd = true;
				m_escaped = false;
			} //}}}
		} //}}}

		//{{{ Mark all remaining characters
		m_pos = m_lineLength;

		if(m_context.inRule != null)
			handleRule(m_context.inRule, true);

		handleNoWordBreak();
		markKeyword(true);
		//}}}

		//{{{ Unwind any NO_LINE_BREAK parent delegates
		unwind : while(m_context.parent != null)
		{
			rule = m_context.parent.inRule;
			if((rule != null && (rule.action & ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK) || terminated)
			{
				m_context = m_context.parent;
				m_keywords = m_context.rules.getKeywords();
				m_context.inRule = null;
			}
			else
				break unwind;
		} //}}}

		tokenHandler.handleToken(line, Token.END, m_pos - line.offset, 0, m_context);

		m_context = m_context.intern();
		tokenHandler.setLineContext(m_context);
		return m_context;
	} //}}}

	private Hashtable m_ruleSets;

	private ParserRuleSet m_mainRuleSet;

	private TokenHandler m_tokenHandler;

	private Segment m_line;

	private LineContext m_context;

	private KeywordMap m_keywords;

	private Segment m_pattern = new Segment();

	private int m_lastOffset;

	private int m_lineLength;

	private int m_pos;

	private boolean m_escaped;

	private int m_whitespaceEnd;

	private boolean m_seenWhitespaceEnd;

	private boolean checkDelegateEnd(ParserRule rule)
	{
		if(rule.end == null)
			return false;

		LineContext tempContext = m_context;
		m_context = m_context.parent;
		m_keywords = m_context.rules.getKeywords();
		boolean tempEscaped = m_escaped;
		boolean b = handleRule(rule, true);
		m_context = tempContext;
		m_keywords = m_context.rules.getKeywords();

		if(b && !tempEscaped)
		{
			if(m_context.inRule != null)
				handleRule(m_context.inRule, true);

			markKeyword(true);

			m_context = (LineContext) m_context.parent.clone();

			m_tokenHandler.handleToken(m_line, (m_context.inRule.action & ParserRule.EXCLUDE_MATCH) == ParserRule.EXCLUDE_MATCH ? m_context.rules
					.getDefault() : m_context.inRule.token, m_pos - m_line.offset, m_pattern.count, m_context);

			m_keywords = m_context.rules.getKeywords();
			m_context.inRule = null;
			m_lastOffset = m_pos + m_pattern.count;

			// move pos to last character of match sequence
			m_pos += (m_pattern.count - 1);

			return true;
		}

		// check escape rule of parent
		if((rule.action & ParserRule.NO_ESCAPE) == 0)
		{
			ParserRule escape = m_context.parent.rules.getEscapeRule();
			if(escape != null && handleRule(escape, false))
				return true;
		}

		return false;
	}

	/**
	 * Checks if the rule matches the line at the current position
	 * and handles the rule if it does match
	 */
	private boolean handleRule(ParserRule checkRule, boolean end)
	{
		//{{{ Some rules can only match in certain locations
		if(!end)
		{
			if(Character.toUpperCase(checkRule.hashChar) != Character.toUpperCase(m_line.array[m_pos]))
			{
				return false;
			}
		}

		int offset = ((checkRule.action & ParserRule.MARK_PREVIOUS) != 0) ? m_lastOffset : m_pos;
		int posMatch = (end ? checkRule.endPosMatch : checkRule.startPosMatch);

		if((posMatch & ParserRule.AT_LINE_START) == ParserRule.AT_LINE_START)
		{
			if(offset != m_line.offset)
				return false;
		}
		else if((posMatch & ParserRule.AT_WHITESPACE_END) == ParserRule.AT_WHITESPACE_END)
		{
			if(offset != m_whitespaceEnd)
				return false;
		}
		else if((posMatch & ParserRule.AT_WORD_START) == ParserRule.AT_WORD_START)
		{
			if(offset != m_lastOffset)
				return false;
		} //}}}

		int matchedChars = 1;
		CharIndexedSegment charIndexed = null;
		REMatch match = null;

		//{{{ See if the rule's start or end sequence matches here
		if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0)
		{
			// the end cannot be a regular expression
			if((checkRule.action & ParserRule.REGEXP) == 0 || end)
			{
				if(end)
				{
					if(m_context.spanEndSubst != null)
						m_pattern.array = m_context.spanEndSubst;
					else
						m_pattern.array = checkRule.end;
				}
				else
					m_pattern.array = checkRule.start;
				m_pattern.offset = 0;
				m_pattern.count = m_pattern.array.length;
				matchedChars = m_pattern.count;

				if(!SyntaxUtilities.regionMatches(m_context.rules.getIgnoreCase(), m_line, m_pos, m_pattern.array))
				{
					return false;
				}
			}
			else
			{
				// note that all regexps start with \A so they only
				// match the start of the string
				int matchStart = m_pos - m_line.offset;
				charIndexed = new CharIndexedSegment(m_line, matchStart);
				match = checkRule.startRegexp.getMatch(charIndexed, 0, RE.REG_ANCHORINDEX);
				if(match == null)
					return false;
				else if(match.getStartIndex() != 0)
					throw new InternalError("Can't happen");
				else
				{
					matchedChars = match.getEndIndex();
					/* workaround for hang if match was
					 * zero-width. not sure if there is
					 * a better way to handle this */
					if(matchedChars == 0)
						matchedChars = 1;
				}
			}
		} //}}}

		//{{{ Check for an escape sequence
		if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE)
		{
			if(m_context.inRule != null)
				handleRule(m_context.inRule, true);

			m_escaped = !m_escaped;
			m_pos += m_pattern.count - 1;
		}
		else if(m_escaped)
		{
			m_escaped = false;
			m_pos += m_pattern.count - 1;
		} //}}}
		//{{{ Handle start of rule
		else if(!end)
		{
			if(m_context.inRule != null)
				handleRule(m_context.inRule, true);

			markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS) != ParserRule.MARK_PREVIOUS);

			switch(checkRule.action & ParserRule.MAJOR_ACTIONS)
			{
				//{{{ SEQ
				case ParserRule.SEQ:
					m_context.spanEndSubst = null;

					if((checkRule.action & ParserRule.REGEXP) != 0)
					{
						handleTokenWithSpaces(m_tokenHandler, checkRule.token, m_pos - m_line.offset, matchedChars, m_context);
					}
					else
					{
						m_tokenHandler.handleToken(m_line, checkRule.token, m_pos - m_line.offset, matchedChars, m_context);
					}

					// a DELEGATE attribute on a SEQ changes the
					// ruleset from the end of the SEQ onwards
					if(checkRule.delegate != null)
					{
						m_context = new LineContext(checkRule.delegate, m_context.parent);
						m_keywords = m_context.rules.getKeywords();
					}
					break;
				//}}}
				//{{{ SPAN, EOL_SPAN
				case ParserRule.SPAN:
				case ParserRule.EOL_SPAN:
					m_context.inRule = checkRule;

					byte tokenType = ((checkRule.action & ParserRule.EXCLUDE_MATCH) == ParserRule.EXCLUDE_MATCH ? m_context.rules.getDefault()
							: checkRule.token);

					if((checkRule.action & ParserRule.REGEXP) != 0)
					{
						handleTokenWithSpaces(m_tokenHandler, tokenType, m_pos - m_line.offset, matchedChars, m_context);
					}
					else
					{
						m_tokenHandler.handleToken(m_line, tokenType, m_pos - m_line.offset, matchedChars, m_context);
					}

					char[] spanEndSubst = null;
					/* substitute result of matching the rule start
					 * into the end string.
					 *
					 * eg, in shell script mode, <<\s*(\w+) is
					 * matched into \<$1\> to construct rules for
					 * highlighting read-ins like this <




© 2015 - 2024 Weber Informatics LLC | Privacy Policy