com.jfinal.template.stat.Lexer Maven / Gradle / Ivy
The newest version!
/**
* Copyright (c) 2011-2023, James Zhan 詹波 ([email protected]).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.jfinal.template.stat;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
/**
* DKFF(Dynamic Key Feature Forward) Lexer
*/
class Lexer {
static final char EOF = (char)-1;
static final int TEXT_STATE_DIAGRAM = 999;
char[] buf;
int state = 0;
int lexemeBegin = 0;
int forward = 0;
int beginRow = 1;
int forwardRow = 1;
TextToken previousTextToken = null;
String fileName;
Set keepLineBlankDirectives;
List tokens = new ArrayList();
public Lexer(StringBuilder content, String fileName, Set keepLineBlankDirectives) {
this.keepLineBlankDirectives = keepLineBlankDirectives;
int len = content.length();
buf = new char[len + 1];
content.getChars(0, content.length(), buf, 0);
buf[len] = EOF;
this.fileName = fileName;
}
/**
* 进入每个扫描方法之前 peek() 处于可用状态,不需要 next()
* 每个扫描方法内部是否要 next() 移动,取决定具体情况
* 每个扫描方法成功返回前,将 forward 置于下一次扫描需要处理的地方
* 让下个扫描方法不必 next()
* 紧靠 scanText() 之前的扫描方法在失败后必须保持住forward
* 这是 scanText() 可以一直向前的保障
*/
public List scan() {
while (peek() != EOF) {
if (peek() == '#') {
if (scanDire()) {
continue ;
}
if (scanSingleLineComment()) {
continue ;
}
if (scanMultiLineComment()) {
continue ;
}
if (scanNoParse()) {
continue ;
}
}
scanText();
}
return tokens;
}
/**
* 指令模式与解析规则
* 1:指令 pattern
* #(p)
* #id(p)
* #define id(p)
* #@id(p) / #@id?(p)
* #else / #end
*
* 2:关键字类型指令在获取到关键字以后,必须要正确解析出后续内容,否则抛异常
* 2020-02-28: 该规则改为与 "非关键字指令" 一样
*
* 3:非关键字类型指令只有在本行内出现 # id ( 三个序列以后,才要求正确解析出后续内容
* 否则当成普通文本
*/
boolean scanDire() {
String id = null;
StringBuilder para = null;
Token idToken = null;
Token paraToken = null;
while (true) {
switch (state) {
case 0:
if (peek() == '#') { // #
next();
skipBlanks();
state = 1;
continue ;
}
return fail();
case 1:
if (peek() == '(') { // # (
para = scanPara("");
idToken = new Token(Symbol.OUTPUT, beginRow);
paraToken = new ParaToken(para, beginRow);
return addIdParaToken(idToken, paraToken);
}
if (CharTable.isLetter(peek())) { // # id
state = 10;
continue ;
}
if (peek() == '@') { // # @
next();
skipBlanks();
if (CharTable.isLetter(peek())) { // # @ id
state = 20;
continue ;
}
}
return fail();
// -----------------------------------------------------
case 10: // # id
id = scanId();
Symbol symbol = Symbol.getKeywordSym(id);
// 非关键字指令
if (symbol == null) {
state = 11;
continue ;
}
// define 指令
if (symbol == Symbol.DEFINE) {
state = 12;
continue ;
}
// 在支持 #seleif 的基础上,支持 #else if
if (symbol == Symbol.ELSE) {
if (foundFollowingIf()) {
id = "else if";
symbol = Symbol.ELSEIF;
}
}
// 无参关键字指令
if (symbol.noPara()) {
return addNoParaToken(new Token(symbol, id, beginRow));
}
// 有参关键字指令
skipBlanks();
if (peek() == '(') {
para = scanPara(id);
idToken = new Token(symbol, beginRow);
paraToken = new ParaToken(para, beginRow);
return addIdParaToken(idToken, paraToken);
}
// throw new ParseException("#" + id + " directive requires parentheses \"()\"", new Location(fileName, beginRow));
return fail(); // 2020-02-28: 关键字指令在没有左括号的情况下也当作普通文本。支持更多应用场景,例如:jquery id 选择器用法 $("#if")
case 11: // 用户自定义指令必须有参数
skipBlanks();
if (peek() == '(') {
para = scanPara(id);
idToken = new Token(Symbol.ID, id, beginRow);
paraToken = new ParaToken(para, beginRow);
return addIdParaToken(idToken, paraToken);
}
return fail(); // 用户自定义指令在没有左括号的情况下当作普通文本
case 12: // 处理 "# define id (para)" 指令
skipBlanks();
if (CharTable.isLetter(peek())) {
id = scanId(); // 模板函数名称
skipBlanks();
if (peek() == '(') {
para = scanPara("define " + id);
idToken = new Token(Symbol.DEFINE, id, beginRow);
paraToken = new ParaToken(para, beginRow);
return addIdParaToken(idToken, paraToken);
}
throw new ParseException("#define " + id + " : template function definition requires parentheses \"()\"", new Location(fileName, beginRow));
}
throw new ParseException("#define directive requires identifier as a function name", new Location(fileName, beginRow));
case 20: // # @ id
id = scanId();
skipBlanks();
boolean hasQuestionMark = peek() == '?';
if (hasQuestionMark) {
next();
skipBlanks();
}
if (peek() == '(') {
para = scanPara(hasQuestionMark ? "@" + id + "?" : "@" + id);
idToken = new Token(hasQuestionMark ? Symbol.CALL_IF_DEFINED : Symbol.CALL, id, beginRow);
paraToken = new ParaToken(para, beginRow);
return addIdParaToken(idToken, paraToken);
}
return fail();
default :
return fail();
}
}
}
boolean foundFollowingIf() {
int p = forward;
while (CharTable.isBlank(buf[p])) {p++;}
if (buf[p++] == 'i') {
if (buf[p++] == 'f') {
while (CharTable.isBlank(buf[p])) {p++;}
// 要求出现 '(' 才认定解析成功,为了支持这种场景: #else if you ...
if (buf[p] == '(') {
forward = p;
return true;
}
}
}
return false;
}
/**
* 调用者已确定以字母或下划线开头,故一定可以获取到 id值
*/
String scanId() {
int idStart = forward;
while (CharTable.isLetterOrDigit(next())) {
;
}
return subBuf(idStart, forward - 1).toString();
}
/**
* 扫描指令参数,成功则返回,否则抛出词法分析异常
*/
StringBuilder scanPara(String id) {
char quotes = '"';
int localState = 0;
int parenDepth = 1; // 指令后面参数的第一个 '(' 深度为 1
next();
int paraStart = forward;
while (true) {
switch (localState) {
case 0:
for (char c=peek(); true; c=next()) {
if (c == ')') {
parenDepth--;
if (parenDepth == 0) { // parenDepth 不可能小于0,因为初始值为 1
next();
return subBuf(paraStart, forward - 2);
}
continue ;
}
if (c == '(') {
parenDepth++;
continue ;
}
if (c == '"' || c == '\'') {
quotes = c;
localState = 1;
break ;
}
if (CharTable.isExprChar(c)) {
continue ;
}
if (c == EOF) {
throw new ParseException("#" + id + " parameter can not match the end char ')'", new Location(fileName, beginRow));
}
throw new ParseException("#" + id + " parameter exists illegal char: '" + c + "'", new Location(fileName, beginRow));
}
break ;
case 1:
for (char c=next(); true; c=next()) {
if (c == quotes) {
if (buf[forward - 1] != '\\') { // 前一个字符不是转义字符
next();
localState = 0;
break ;
} else {
continue ;
}
}
if (c == EOF) {
throw new ParseException("#" + id + " parameter error, the string parameter not ending", new Location(fileName, beginRow));
}
}
break ;
}
}
}
/**
* 单行注释,开始状态 100,关注换行与 EOF
*/
boolean scanSingleLineComment() {
while (true) {
switch (state) {
case 100:
if (peek() == '#' && next() == '#' && next() == '#') {
state = 101;
continue ;
}
return fail();
case 101:
for (char c=next(); true; c=next()) {
if (c == '\n') {
if (deletePreviousTextTokenBlankTails()) {
return prepareNextScan(1);
} else {
return prepareNextScan(0);
}
}
if (c == EOF) {
deletePreviousTextTokenBlankTails();
return prepareNextScan(0);
}
}
default :
return fail();
}
}
}
/**
* 多行注释,开始状态 200,关注结尾标记与 EOF
*/
boolean scanMultiLineComment() {
while (true) {
switch (state) {
case 200:
if (peek() == '#' && next() == '-' && next() == '-') {
state = 201;
continue ;
}
return fail();
case 201:
for (char c=next(); true; c=next()) {
if (c == '-' && buf[forward + 1] == '-' && buf[forward + 2] == '#') {
forward = forward + 3;
if (lookForwardLineFeedAndEof() && deletePreviousTextTokenBlankTails()) {
return prepareNextScan(peek() != EOF ? 1 : 0);
} else {
return prepareNextScan(0);
}
}
if (c == EOF) {
throw new ParseException("The multiline comment start block \"#--\" can not match the end block: \"--#\"", new Location(fileName, beginRow));
}
}
default :
return fail();
}
}
}
/**
* 非解析块,开始状态 300,关注结尾标记与 EOF
*/
boolean scanNoParse() {
while (true) {
switch (state) {
case 300:
if (peek() == '#' && next() == '[' && next() == '[') {
state = 301;
continue ;
}
return fail();
case 301:
for (char c=next(); true; c=next()) {
if (c == ']' && buf[forward + 1] == ']' && buf[forward + 2] == '#') {
addTextToken(subBuf(getNoParseStart(), forward - 1)); // NoParse 块使用 TextToken
// return prepareNextScan(3);
forward = forward + 3;
if (lookForwardLineFeedAndEof() && deletePreviousTextTokenBlankTails()) {
return prepareNextScan(peek() != EOF ? 1 : 0);
} else {
return prepareNextScan(0);
}
}
if (c == EOF) {
throw new ParseException("The \"no parse\" start block \"#[[\" can not match the end block: \"]]#\"", new Location(fileName, beginRow));
}
}
default :
return fail();
}
}
}
// 非解析块头部 #[[ 处在独立一行时,要删除行尾的换行字符
int getNoParseStart() {
int fp = lexemeBegin + 3;
for (char c=buf[fp]; true; c=buf[++fp]) {
if (CharTable.isBlank(c)) {
continue ;
}
// #[[ 处在独立一行
if (c == '\n' && deletePreviousTextTokenBlankTails()) {
return fp + 1;
} else {
return lexemeBegin + 3;
}
}
}
boolean scanText() {
for (char c=peek(); true; c=next()) {
if (c == '#' || c == EOF) {
addTextToken(subBuf(lexemeBegin, forward - 1));
return prepareNextScan(0);
}
}
}
boolean fail() {
if (state < 300) {
forward = lexemeBegin;
forwardRow = beginRow;
}
if (state < 100) {
state = 100;
} else if (state < 200) {
state = 200;
} else if (state < 300) {
state = 300;
} else {
state = TEXT_STATE_DIAGRAM;
}
return false;
}
char next() {
if (buf[forward] == '\n') {
forwardRow++;
}
return buf[++forward];
}
char peek() {
return buf[forward];
}
void skipBlanks() {
while (CharTable.isBlank(buf[forward])) {
next();
}
}
/**
* scanPara 与 scanNoParse 存在 start > end 的情况
*/
StringBuilder subBuf(int start, int end) {
if (start > end) {
return null;
}
StringBuilder ret = new StringBuilder(end - start + 1);
for (int i=start; i<=end; i++) {
ret.append(buf[i]);
}
return ret;
}
boolean prepareNextScan(int moveForward) {
for (int i=0; i
© 2015 - 2024 Weber Informatics LLC | Privacy Policy