
com.twineworks.kettle.ruby.step.RubyStepSyntaxHighlighter Maven / Gradle / Ivy
/*
* Ruby for pentaho kettle
* Copyright (C) 2017 Twineworks GmbH
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.twineworks.kettle.ruby.step;
import org.apache.commons.lang.ArrayUtils;
import org.eclipse.swt.SWT;
import org.eclipse.swt.custom.StyleRange;
import org.eclipse.swt.custom.StyledText;
import org.eclipse.swt.graphics.Color;
import org.eclipse.swt.graphics.RGB;
import org.eclipse.swt.widgets.Display;
import org.jcodings.specific.UTF8Encoding;
import org.jruby.Ruby;
import org.jruby.common.NullWarnings;
import org.jruby.lexer.ByteListLexerSource;
import org.jruby.lexer.LexerSource;
import org.jruby.lexer.LexingCommon;
import org.jruby.lexer.yacc.RubyLexer;
import org.jruby.lexer.yacc.SyntaxException;
import org.jruby.parser.ParserConfiguration;
import org.jruby.parser.ParserSupport;
import org.jruby.parser.RubyParser;
import org.jruby.parser.RubyParserResult;
import org.jruby.util.ByteList;
import org.pentaho.di.ui.core.widget.StyledTextComp;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.SortedSet;
import java.util.TreeSet;
public class RubyStepSyntaxHighlighter {
private final int TOKEN_COMMENT = -100;
private final int COLOR_BLACK = 0;
private final int COLOR_GREENISH = 1;
private final int COLOR_VIOLET = 2;
private final int COLOR_BLUE = 3;
private final int COLOR_BLUE_GREEN = 4;
private final int COLOR_YELLOW = 5;
private final int COLOR_RED = 6;
private final int COLOR_GRAY = 7;
private final int COLOR_ORANGE = 8;
private final int STYLE_DEFAULT = 0;
private final int STYLE_STRING = 1;
private final int STYLE_SYMBOL = 2;
private final int STYLE_KEYWORD = 3;
private final int STYLE_GLOBAL_FUNCTION = 4;
private final int STYLE_STANDARD_METHOD = 5;
private final int STYLE_LITERAL_BOUNDARY = 6;
private final int STYLE_COMMENT = 7;
private final int STYLE_CONSTANT = 8;
private final int STYLE_VARIABLE = 9;
private final int STYLE_NUMBER = 10;
private final StyleRange[] styles;
private RubyLexer lexer;
private ParserSupport parserSupport;
private byte[] utf8Bytes;
private int[] codePointIndex;
private String[] STANDARD_GLOBAL_FUNCTIONS = {"abort", "autoload", "autoload?", "binding", "block_given?", "callcc", "caller", "chomp", "chomp!", "chop",
"chop!", "evel", "exec", "exit", "exit!", "fail", "fork", "format", "getc", "gets", "gsub", "gsub!", "iterator?", "load", "open", "p", "print", "printf", "putc", "puts", "rand",
"readline", "readlines", "scan", "select", "sleep", "split", "sprintf", "srand", "sub", "sub!", "syscall", "system", "test", "trap", "warn"
};
SortedSet GLOBAL_FUNCTIONS_SET = new TreeSet<>(Arrays.asList(STANDARD_GLOBAL_FUNCTIONS));
private String[] STANDARD_METHODS = {"allocate", "clone", "display", "dup", "enum_for", "eql?", "equal?", "extend", "freeze", "frozen?", "hash", "id", "inherited", "inspect", "instance_of?", "is_a?",
"kind_of?", "method", "methods", "new", "nil?", "object_id", "respond_to?", "send", "superclass", "taint", "tainted?", "to_a", "to_enum", "to_s", "untaint"
};
SortedSet STANDARD_METHODS_SET = new TreeSet<>(Arrays.asList(STANDARD_METHODS));
private String[] PSEUDO_KEYWORDS = {"at_exit", "attr", "attr_accessor", "attr_reader", "attr_writer", "include", "lambda", "load", "proc", "loop", "private", "protected", "public", "raise", "catch",
"java_import", "require", "import", "include_package"
};
SortedSet PSEUDO_KEYWORDS_SET = new TreeSet<>(Arrays.asList(PSEUDO_KEYWORDS));
private String script;
RubyStepSyntaxHighlighter() {
// -- the colors to use --
Display display = Display.getDefault();
Color[] colors = new Color[]{
new Color(display, new RGB(0, 0, 0)), // Black
new Color(display, new RGB(63, 127, 95)), // Greenish
new Color(display, new RGB(137, 89, 168)), // Violet
new Color(display, new RGB(66, 113, 174)), // Blue
new Color(display, new RGB(62, 153, 159)), // BlueGreen
new Color(display, new RGB(234, 183, 0)), // Yellow
new Color(display, new RGB(200, 40, 41)), // Red
new Color(display, new RGB(142, 144, 140)), // Gray
new Color(display, new RGB(245, 135, 31)) // Orange
};
styles = new StyleRange[]{
new StyleRange(0, 0, null, null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_GREENISH], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_GREENISH], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_VIOLET], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_BLUE], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_BLUE], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_BLUE_GREEN], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_GRAY], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_YELLOW], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_RED], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_ORANGE], null, SWT.NORMAL)
};
}
private StyleRange tokenToStyleRange(int token, Object value, int previousToken) {
// determine keyword style up front
if (token >= RubyParser.keyword_class && token <= RubyParser.keyword_do_lambda) {
return styles[STYLE_KEYWORD];
}
switch (token) {
case TOKEN_COMMENT:
return styles[STYLE_COMMENT];
case RubyParser.tINTEGER:
case RubyParser.tFLOAT:
case RubyParser.tRATIONAL:
return styles[STYLE_NUMBER];
case RubyParser.tSTRING_BEG:
case RubyParser.tSTRING_CONTENT:
case RubyParser.tSTRING_END:
case RubyParser.tSTRING_DBEG:
case RubyParser.tSTRING_DVAR:
return styles[STYLE_STRING];
case RubyParser.tCONSTANT:
return styles[STYLE_CONSTANT];
case RubyParser.tGVAR:
case RubyParser.tIVAR:
return styles[STYLE_VARIABLE];
case RubyParser.tREGEXP_BEG:
case RubyParser.tREGEXP_END:
case RubyParser.tPIPE:
return styles[STYLE_LITERAL_BOUNDARY];
case RubyParser.tSYMBEG:
return styles[STYLE_SYMBOL];
case RubyParser.tIDENTIFIER:
if (previousToken == RubyParser.tSYMBEG) {
return styles[STYLE_SYMBOL];
}
// fall through
case RubyParser.tFID:
if (value != null && PSEUDO_KEYWORDS_SET.contains(value.toString())) {
return styles[STYLE_KEYWORD];
}
if (value != null && STANDARD_METHODS_SET.contains(value.toString())) {
return styles[STYLE_STANDARD_METHOD];
}
if (value != null && GLOBAL_FUNCTIONS_SET.contains(value.toString())) {
return styles[STYLE_GLOBAL_FUNCTION];
}
// fall through
default:
return styles[STYLE_DEFAULT];
}
}
private int totalUtf8Bytes(byte initialByte) {
int i = initialByte;
if ((i & 0x000000F0) == 0x000000F0) {
return 4;
}
if ((i & 0x000000E0) == 0x000000E0) {
return 3;
}
if ((i & 0x000000C0) == 0x000000C0) {
return 2;
}
return 1;
}
private void initBytes() {
utf8Bytes = script.getBytes(StandardCharsets.UTF_8);
codePointIndex = new int[utf8Bytes.length + 1];
int c = 0;
for (int i = 0; i < utf8Bytes.length; i++) {
byte utf8Byte = utf8Bytes[i];
codePointIndex[i] = c;
int eatBytes = totalUtf8Bytes(utf8Byte) - 1;
for (int j = 0; j < eatBytes; j++) {
codePointIndex[++i] = c;
}
c += 1;
}
codePointIndex[utf8Bytes.length] = c;
}
private int charOffset(int byteOffset) {
if (byteOffset >= codePointIndex.length) return script.length();
return script.offsetByCodePoints(0, codePointIndex[byteOffset]);
}
private void initLexer(String title) {
LexerSource lexerSource = new ByteListLexerSource(title, 0, new ByteList(utf8Bytes), null);
lexerSource.setEncoding(UTF8Encoding.INSTANCE);
ParserSupport parserSupport = new ParserSupport();
lexer = new RubyLexer(parserSupport, lexerSource, new NullWarnings(Ruby.getGlobalRuntime()));
parserSupport.setLexer(lexer);
parserSupport.setConfiguration(new ParserConfiguration(Ruby.getGlobalRuntime(), 0, false, true, false));
parserSupport.setResult(new RubyParserResult());
parserSupport.setWarnings(new NullWarnings(Ruby.getGlobalRuntime()));
parserSupport.initTopLocalVariables();
lexer.setState(LexingCommon.EXPR_BEG);
}
private int lexerOffset() {
int p = lexer.lex_p;
int pend = lexer.lex_pend;
int lo = lexer.getLineOffset();
int ret = lexer.eofp ? lo : p + lo;
return ret;
}
void highlight(String title, StyledTextComp wText) {
// set up lexer process
script = wText.getText();
StyledText canvas = wText.getStyledText();
initBytes();
initLexer(title);
// remember bounds of current token
int leftTokenBorder = 0;
int rightTokenBorder = 0;
int token = 0;
int previousToken = 0;
int lastCommentEnd = 0;
ArrayList ranges = new ArrayList<>(200);
ArrayList intRanges = new ArrayList<>(400);
try {
boolean keepParsing = true;
while (keepParsing) {
/* take care of comments, which are stripped out by the lexer */
int[] upcomingComment = null;
while ((rightTokenBorder >= lastCommentEnd || rightTokenBorder == 0) && (upcomingComment = getUpcomingCommentPos(rightTokenBorder)) != null) {
leftTokenBorder = upcomingComment[0];
rightTokenBorder = leftTokenBorder + upcomingComment[1];
lastCommentEnd = rightTokenBorder;
// System.out.println("Found comment -> [" + leftTokenBorder + "," + rightTokenBorder + "]");
ranges.add(tokenToStyleRange(TOKEN_COMMENT, null, previousToken));
int start = charOffset(leftTokenBorder);
int count = charOffset(rightTokenBorder) - start;
intRanges.add(start);
intRanges.add(count);
}
/* read language syntax */
int oldOffset = lexerOffset();
previousToken = token;
token = lexer.nextToken();
keepParsing = !lexer.eofp;
if (token > 0 && token != 10) {
Object v = lexer.value();
leftTokenBorder = oldOffset;
if (leftTokenBorder < lastCommentEnd && lexerOffset() > lastCommentEnd) {
leftTokenBorder = lastCommentEnd;
}
rightTokenBorder = lexerOffset();
// System.out.println("Found token " + token + " -> " + lexer.value() + " [" + leftTokenBorder + "," + rightTokenBorder + "]");
ranges.add(tokenToStyleRange(token, v, previousToken));
int start = charOffset(leftTokenBorder);
int count = charOffset(rightTokenBorder) - start;
intRanges.add(start);
intRanges.add(count);
}
}
// don't mind anything that might go wrong during parsing
} catch (SyntaxException e) {
// apply the latest style to the rest of the file in case there is a syntax error
if (ranges.size() > 0) {
ranges.remove(ranges.size() - 1);
intRanges.remove(intRanges.size() - 1);
intRanges.remove(intRanges.size() - 1);
}
ranges.add(tokenToStyleRange(token, null, previousToken));
int start = charOffset(leftTokenBorder);
intRanges.add(start);
intRanges.add(script.length() - start);
} catch (Exception ignored) {
// the lexer will sometimes throw a non-syntax exception when confronted with malformed input
// ignored.printStackTrace();
}
// don't mind swt errors in case some unforeseen input brought the style ranges out of order
try {
canvas.setStyleRanges(ArrayUtils.toPrimitive(intRanges.toArray(new Integer[0])), ranges.toArray(new StyleRange[0]));
} catch (Exception e) {
// e.printStackTrace();
}
}
// returns position and length pair of a comment that starts at this position (forwarding through whitespace)
// returns null if there's no comment coming up
private int[] getUpcomingCommentPos(int pos) {
// if we're in the middle of a string or regex, there's no comments
if (lexer.getStrTerm() != null)
return null;
// looking for next comment while ignoring whitespace
boolean searchingComment = true;
boolean isComment = false;
int idx = pos;
do {
if (idx >= utf8Bytes.length) {
searchingComment = false;
break;
}
switch (utf8Bytes[idx]) {
case '\t':
case ' ':
case '\n':
case '\r':
idx += 1;
break;
case '#':
isComment = true;
searchingComment = false;
break;
default:
searchingComment = false;
}
} while (searchingComment);
if (isComment) {
// now to determine it's length, just scan up to \n or EOF
int end = idx;
boolean foundEnd = false;
do {
end += 1;
if (end >= utf8Bytes.length) {
foundEnd = true;
break;
}
switch (utf8Bytes[end]) {
case '\n':
foundEnd = true;
}
} while (!foundEnd);
return new int[]{idx, end - idx};
} else {
return null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy