org.gjt.sp.jedit.syntax.TokenMarker Maven / Gradle / Ivy
Go to download
This project aims to build a command line tool that can create
HTML view with syntax highlighted source code.
It uses Jedit syntax highlighting engine and support all languages that are supported in JEdit.
Which are currently: ActionScript, Ada 95, ANTLR, Apache HTTPD, APDL, AppleScript, ASP, Aspect-J, Assembly, AWK, B formal method, Batch, BBj, BCEL, BibTeX, C, C++, C#, CHILL, CIL, COBOL, ColdFusion, CSS, CVS Commit, D, DOxygen, DSSSL, Eiffel, EmbPerl, Erlang, Factor, Fortran, Foxpro, FreeMarker, Fortran, Gettext, Groovy, Haskell, HTML, Icon, IDL, Inform, INI, Inno Setup, Informix 4GL, Interlis, Io, Java, JavaScript, JCL, JHTML, JMK, JSP, Latex, Lilypond, Lisp, LOTOS, Lua, Makefile, Maple, ML, Modula-3, MoinMoin, MQSC, NetRexx, NQC, NSIS2, Objective C, ObjectRexx, Occam, Omnimark, Parrot, Pascal, Patch, Perl, PHP, Pike, PL-SQL, PL/I, Pop11, PostScript, Povray, PowerDynamo, Progress 4GL, Prolog, Properties, PSP, PV-WAVE, Pyrex, Python, REBOL, Redcode, Relax-NG, RelationalView, Rest, Rib, RPM spec, RTF, Ruby, Ruby-HTML, RView, S+, S#, SAS, Scheme, SDL/PL, SGML, Shell Script, SHTML, Smalltalk, SMI MIB, SQR, Squidconf, SVN Commit, Swig, TCL, TeX, Texinfo, TPL, Transact-SQL, UnrealScript, VBScript, Velocity, Verilog, VHDL, XML, XSL, ZPT
The newest version!
/*
* TokenMarker.java - Tokenizes lines of text
* :tabSize=8:indentSize=8:noTabs=false:
* :folding=explicit:collapseFolds=1:
*
* Copyright (C) 1998, 2003 Slava Pestov
* Copyright (C) 1999, 2000 mike dillon
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package org.gjt.sp.jedit.syntax;
//{{{ Imports
import javax.swing.text.Segment;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.gjt.sp.jedit.TextUtilities;
import org.gjt.sp.util.SegmentCharSequence;
import org.gjt.sp.util.StandardUtilities;
//}}}
/**
* A token marker splits lines of text into tokens. Each token carries
* a length field and an identification tag that can be mapped to a color
* or font style for painting that token.
*
* @author Slava Pestov, mike dillon
* @version $Id: TokenMarker.java 12504 2008-04-22 23:12:43Z ezust $
*
* @see org.gjt.sp.jedit.syntax.Token
* @see org.gjt.sp.jedit.syntax.TokenHandler
*/
public class TokenMarker
{
//{{{ TokenMarker constructor
public TokenMarker()
{} //}}}
//{{{ addRuleSet() method
public void addRuleSet(ParserRuleSet rules)
{
ruleSets.put(rules.getSetName(), rules);
if (rules.getSetName().equals("MAIN"))
mainRuleSet = rules;
} //}}}
//{{{ getMainRuleSet() method
public ParserRuleSet getMainRuleSet()
{
return mainRuleSet;
} //}}}
//{{{ getRuleSet() method
public ParserRuleSet getRuleSet(String setName)
{
return ruleSets.get(setName);
} //}}}
//{{{ getRuleSets() method
/**
* @since jEdit 4.2pre3
*/
public ParserRuleSet[] getRuleSets()
{
return ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]);
} //}}}
//{{{ markTokens() method
/**
* Do not call this method directly; call Buffer.markTokens() instead.
*
* @param prevContext the context of the previous line, it can be null
* @param tokenHandler the token handler
* @param line a segment containing the content of the line
*/
public LineContext markTokens(LineContext prevContext,
TokenHandler tokenHandler, Segment line)
{
//{{{ Set up some instance variables
// this is to avoid having to pass around lots and lots of
// parameters.
this.tokenHandler = tokenHandler;
this.line = line;
lastOffset = line.offset;
lineLength = line.count + line.offset;
context = new LineContext();
if(prevContext == null)
{
context.rules = getMainRuleSet();
context.escapeRule = context.rules.getEscapeRule();
}
else
{
context.parent = prevContext.parent;
context.setInRule(prevContext.inRule);
context.rules = prevContext.rules;
context.spanEndSubst = prevContext.spanEndSubst;
}
keywords = context.rules.getKeywords();
seenWhitespaceEnd = false;
whitespaceEnd = line.offset;
//}}}
//{{{ Main parser loop
int terminateChar = context.rules.getTerminateChar();
boolean terminated = false;
main_loop: for(pos = line.offset; pos < lineLength; pos++)
{
//{{{ check if we have to stop parsing (happens if the terminateChar has been exceeded)
if(terminateChar >= 0 && pos - line.offset >= terminateChar
&& !terminated)
{
terminated = true;
context = new LineContext(ParserRuleSet
.getStandardRuleSet(context.rules
.getDefault()),context);
keywords = context.rules.getKeywords();
} //}}}
//{{{ Check for the escape rule before anything else.
if (context.escapeRule != null &&
handleRule(context.escapeRule,false))
{
continue main_loop;
} //}}}
//{{{ check for end of delegate
if (context.parent != null
&& context.parent.inRule != null
&& checkDelegateEnd(context.parent.inRule))
{
seenWhitespaceEnd = true;
continue main_loop;
} //}}}
//{{{ check every rule
Character ch = Character.valueOf(line.array[pos]);
List rules = context.rules.getRules(ch);
for (ParserRule rule : rules)
{
// stop checking rules if there was a match
if (handleRule(rule,false))
{
seenWhitespaceEnd = true;
continue main_loop;
}
} //}}}
//{{{ check if current character is a word separator
if(Character.isWhitespace(ch))
{
if(!seenWhitespaceEnd)
whitespaceEnd = pos + 1;
if(context.inRule != null)
handleRule(context.inRule,true);
handleNoWordBreak();
markKeyword(false);
if(lastOffset != pos)
{
tokenHandler.handleToken(line,
context.rules.getDefault(),
lastOffset - line.offset,
pos - lastOffset,
context);
}
tokenHandler.handleToken(line,
context.rules.getDefault(),
pos - line.offset,1,context);
lastOffset = pos + 1;
}
else
{
if(keywords != null || context.rules.getRuleCount() != 0)
{
String noWordSep = context.rules.getNoWordSep();
if(!Character.isLetterOrDigit(ch)
&& noWordSep.indexOf(ch) == -1)
{
if(context.inRule != null)
handleRule(context.inRule,true);
handleNoWordBreak();
markKeyword(true);
tokenHandler.handleToken(line,
context.rules.getDefault(),
lastOffset - line.offset,1,
context);
lastOffset = pos + 1;
}
}
seenWhitespaceEnd = true;
} //}}}
} //}}}
//{{{ Mark all remaining characters
pos = lineLength;
if(context.inRule != null)
handleRule(context.inRule,true);
handleNoWordBreak();
markKeyword(true);
//}}}
//{{{ Unwind any NO_LINE_BREAK parent delegates
unwind: while(context.parent != null)
{
ParserRule rule = context.parent.inRule;
if((rule != null && (rule.action
& ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK)
|| terminated)
{
context = context.parent;
keywords = context.rules.getKeywords();
context.setInRule(null);
}
else
break unwind;
} //}}}
tokenHandler.handleToken(line,Token.END,
pos - line.offset,0,context);
context = context.intern();
tokenHandler.setLineContext(context);
/* for GC. */
this.line = null;
return context;
} //}}}
//{{{ Private members
//{{{ Instance variables
private final Map ruleSets = new Hashtable(64);
private ParserRuleSet mainRuleSet;
// Instead of passing these around to each method, we just store them
// as instance variables. Note that this is not thread-safe.
private TokenHandler tokenHandler;
/** The line from which we will mark the tokens. */
private Segment line;
/** The context of the current line. */
private LineContext context;
private KeywordMap keywords;
private final Segment pattern = new Segment();
private int lastOffset;
private int lineLength;
private int pos;
private int whitespaceEnd;
private boolean seenWhitespaceEnd;
//}}}
//{{{ checkDelegateEnd() method
private boolean checkDelegateEnd(ParserRule rule)
{
if(rule.end == null)
return false;
LineContext tempContext = context;
context = context.parent;
keywords = context.rules.getKeywords();
boolean handled = handleRule(rule,true);
context = tempContext;
keywords = context.rules.getKeywords();
if (handled)
{
if(context.inRule != null)
handleRule(context.inRule,true);
markKeyword(true);
context = (LineContext)context.parent.clone();
tokenHandler.handleToken(line,
matchToken(context.inRule, context.inRule, context),
pos - line.offset,pattern.count,context);
keywords = context.rules.getKeywords();
context.setInRule(null);
lastOffset = pos + pattern.count;
// move pos to last character of match sequence
pos += pattern.count - 1;
return true;
}
return false;
} //}}}
//{{{ handleRule() method
/**
* Checks if the rule matches the line at the current position
* and handles the rule if it does match
*/
private boolean handleRule(ParserRule checkRule, boolean end)
{
//{{{ Some rules can only match in certain locations
if(!end)
{
if (null == checkRule.upHashChars)
{
if (checkRule.upHashChar != null &&
(pos + checkRule.upHashChar.length() < line.array.length) &&
!checkHashString(checkRule))
{
return false;
}
}
else
{
if (-1 == Arrays.binarySearch(
checkRule.upHashChars,
Character.toUpperCase(line.array[pos])))
{
return false;
}
}
}
int offset = (checkRule.action & ParserRule.MARK_PREVIOUS) != 0 ? lastOffset : pos;
int posMatch = end ? checkRule.endPosMatch : checkRule.startPosMatch;
if((posMatch & ParserRule.AT_LINE_START)
== ParserRule.AT_LINE_START)
{
if(offset != line.offset)
{
return false;
}
}
else if((posMatch & ParserRule.AT_WHITESPACE_END)
== ParserRule.AT_WHITESPACE_END)
{
if(offset != whitespaceEnd)
{
return false;
}
}
else if((posMatch & ParserRule.AT_WORD_START)
== ParserRule.AT_WORD_START)
{
if(offset != lastOffset)
{
return false;
}
} //}}}
int matchedChars = 1;
CharSequence charSeq = null;
Matcher match = null;
//{{{ See if the rule's start or end sequence matches here
if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0)
{
// the end cannot be a regular expression
if((checkRule.action & ParserRule.REGEXP) == 0 || end)
{
if(end)
{
if(context.spanEndSubst != null)
pattern.array = context.spanEndSubst;
else
pattern.array = checkRule.end;
}
else
pattern.array = checkRule.start;
pattern.offset = 0;
pattern.count = pattern.array.length;
matchedChars = pattern.count;
if(!SyntaxUtilities.regionMatches(context.rules
.getIgnoreCase(),line,pos,pattern.array))
{
return false;
}
}
else
{
// note that all regexps start with \A so they only
// match the start of the string
//int matchStart = pos - line.offset;
charSeq = new SegmentCharSequence(line, pos - line.offset,
line.count - (pos - line.offset));
match = checkRule.startRegexp.matcher(charSeq);
if(!match.lookingAt())
{
return false;
}
else if(match.start() != 0)
{
throw new InternalError("Can't happen");
}
else
{
matchedChars = match.end();
/* workaround for hang if match was
* zero-width. not sure if there is
* a better way to handle this */
if(matchedChars == 0)
matchedChars = 1;
}
}
} //}}}
//{{{ Check for an escape sequence
if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE)
{
pos += pattern.count;
} //}}}
//{{{ Handle start of rule
else if(!end)
{
if(context.inRule != null)
handleRule(context.inRule,true);
markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS)
!= ParserRule.MARK_PREVIOUS);
switch(checkRule.action & ParserRule.MAJOR_ACTIONS)
{
//{{{ SEQ
case ParserRule.SEQ:
context.spanEndSubst = null;
if((checkRule.action & ParserRule.REGEXP) != 0)
{
handleTokenWithSpaces(tokenHandler,
checkRule.token,
pos - line.offset,
matchedChars,
context);
}
else
{
tokenHandler.handleToken(line,
checkRule.token,
pos - line.offset,
matchedChars,context);
}
// a DELEGATE attribute on a SEQ changes the
// ruleset from the end of the SEQ onwards
if(checkRule.delegate != null)
{
context = new LineContext(
checkRule.delegate,
context.parent);
keywords = context.rules.getKeywords();
}
break;
//}}}
//{{{ SPAN, EOL_SPAN
case ParserRule.SPAN:
case ParserRule.EOL_SPAN:
context.setInRule(checkRule);
byte tokenType = matchToken(checkRule,
context.inRule, context);
if((checkRule.action & ParserRule.REGEXP) != 0)
{
handleTokenWithSpaces(tokenHandler,
tokenType,
pos - line.offset,
matchedChars,
context);
}
else
{
tokenHandler.handleToken(line,tokenType,
pos - line.offset,
matchedChars,context);
}
char[] spanEndSubst = null;
/* substitute result of matching the rule start
* into the end string.
*
* eg, in shell script mode, <<\s*(\w+) is
* matched into \<$1\> to construct rules for
* highlighting read-ins like this < intern = new HashMap();
public LineContext parent;
public ParserRule inRule;
public ParserRuleSet rules;
// used for SPAN_REGEXP rules; otherwise null
public char[] spanEndSubst;
public ParserRule escapeRule;
//{{{ LineContext constructor
public LineContext(ParserRuleSet rs, LineContext lc)
{
rules = rs;
parent = (lc == null ? null : (LineContext)lc.clone());
/*
* SPANs with no delegate need to propagate the
* escape rule to the child context, so this is
* needed.
*/
if (rs.getModeName() != null)
escapeRule = rules.getEscapeRule();
else
escapeRule = lc.escapeRule;
} //}}}
//{{{ LineContext constructor
public LineContext()
{
} //}}}
//{{{ intern() method
public LineContext intern()
{
LineContext obj = intern.get(this);
if(obj == null)
{
intern.put(this,this);
return this;
}
else
return obj;
} //}}}
//{{{ hashCode() method
public int hashCode()
{
if(inRule != null)
return inRule.hashCode();
else if(rules != null)
return rules.hashCode();
else
return 0;
} //}}}
//{{{ equals() method
public boolean equals(Object obj)
{
if(obj instanceof LineContext)
{
LineContext lc = (LineContext)obj;
return lc.inRule == inRule && lc.rules == rules
&& StandardUtilities.objectsEqual(parent,lc.parent)
&& charArraysEqual(spanEndSubst,lc.spanEndSubst);
}
else
return false;
} //}}}
//{{{ clone() method
public Object clone()
{
LineContext lc = new LineContext();
lc.inRule = inRule;
lc.rules = rules;
lc.parent = (parent == null) ? null : (LineContext) parent.clone();
lc.spanEndSubst = spanEndSubst;
lc.escapeRule = escapeRule;
return lc;
} //}}}
//{{{ charArraysEqual() method
private static boolean charArraysEqual(char[] c1, char[] c2)
{
if(c1 == null)
return c2 == null;
// c1 is not null
if(c2 == null)
return false;
if(c1.length != c2.length)
return false;
for(int i = 0; i < c1.length; i++)
{
if(c1[i] != c2[i])
return false;
}
return true;
} //}}}
//{{{ setInRule() method
/**
* Sets the current rule being processed and adjusts the
* escape rule for the context based on the rule.
*/
public void setInRule(ParserRule rule)
{
inRule = rule;
if (rule != null && rule.escapeRule != null)
escapeRule = rule.escapeRule;
else if (rules != null && rules.getName() != null)
escapeRule = rules.getEscapeRule();
else if (parent != null)
escapeRule = parent.escapeRule;
else
escapeRule = null;
} //}}}
} //}}}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy