
lt.compiler.BraceScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of latte-compiler Show documentation
Show all versions of latte-compiler Show documentation
The latte-lang compiler project, which contains compiler and runtime required library.
The newest version!
/*
* The MIT License (MIT)
*
* Copyright (c) 2016 KuiGang Wang
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package lt.compiler;
import lt.compiler.lexical.*;
import lt.compiler.syntactic.UnknownTokenException;
import java.io.IOException;
import java.io.Reader;
import java.util.*;
/**
* scan text which is formed up by braces.
*/
public class BraceScanner extends AbstractScanner {
/**
* initiate the processor with a reader
*
* @param fileName the input text file name
* @param reader text reader
* @param properties properties for the Scanner
* @param err error manager
*/
public BraceScanner(String fileName, Reader reader, Properties properties, ErrorManager err) {
super(fileName, reader, properties, err);
init();
}
private void init() {
SPLIT_X.addAll(LAYER);
LAYER.clear();
Set set = new HashSet();
set.addAll(LAYER);
set.addAll(SPLIT_X);
set.addAll(ENDING);
set.add(COMMENT);
set.add(MultipleLineCommentStart);
set.add(MultipleLineCommentEnd);
set.addAll(PAIR.keySet());
set.addAll(PAIR.values());
SPLIT.clear();
// the longest string is considered first
List tmpList = new ArrayList(set);
Collections.sort(tmpList, new Comparator() {
@Override
public int compare(String a, String b) {
return b.length() - a.length();
}
});
SPLIT.addAll(tmpList);
SPLIT.addAll(0, STRING);
}
@Override
public ElementStartNode scan() throws IOException, SyntaxException {
ElementStartNode root = super.scan();
finalCheck(root);
return root;
}
@Override
protected void scan(Args args) throws IOException, SyntaxException {
String line = reader.readLine();
while (line != null) {
++args.currentLine;
err.putLineRecord(args.fileName, args.currentLine, line);
args.currentCol = properties._COLUMN_BASE_;
args.useDefine.clear();
if (args.multipleLineComment) {
if (!line.contains(MultipleLineCommentEnd)) {
line = reader.readLine();
continue;
} else {
int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length();
line = line.substring(subCol);
args.currentCol += (subCol + 1);
args.multipleLineComment = false;
}
}
// the line is nothing but comment
if (line.trim().startsWith(COMMENT)) {
line = reader.readLine();
continue;
}
int COMMENT_index = line.indexOf(COMMENT);
if (COMMENT_index != -1) {
String pre = line.substring(0, COMMENT_index);
String post = line.substring(COMMENT_index);
for (Map.Entry definedEntry : args.defined.entrySet()) {
String tmp = pre;
pre = pre.replace(definedEntry.getKey(), definedEntry.getValue());
if (!tmp.equals(pre)) {
args.useDefine.put(definedEntry.getKey(), definedEntry.getValue());
}
}
line = pre + post;
} else {
for (Map.Entry definedEntry : args.defined.entrySet()) {
String tmp = line;
line = line.replace(definedEntry.getKey(), definedEntry.getValue());
if (!tmp.equals(line)) {
args.useDefine.put(definedEntry.getKey(), definedEntry.getValue());
}
}
}
// get front spaces
int spaces = 0;
for (int i = 0; i < line.length(); ++i) {
if (line.charAt(i) != ' ') {
spaces = i;
break;
}
}
if (args.currentCol == properties._COLUMN_BASE_) {
args.currentCol += spaces + 1;
}
// remove front and end spaces
line = line.trim();
// check it's an empty line
if (line.isEmpty()) {
line = reader.readLine();
continue;
}
// start parsing
scan(line, args);
if (!args.multipleLineComment) {
if (args.previous instanceof Element) {
args.previous = new EndingNode(args, EndingNode.WEAK);
}
}
line = reader.readLine();
}
}
/**
* when the given line is not empty, do scanning.
*
* - check whether the line contains tokens in {@link #SPLIT}
* get the most front token, if several tokens are at the same position, then choose the longest one
* - if the token not found, consider the whole line as one element and append to previous node
* - else
* - record text before the token as an element and do appending
* - check which category the token is in
*
* - {@link #LAYER} means starts after recording the token, a new ElementStartNode should be started, invoke {@link #createStartNode(Args, int)}
* - {@link #SPLIT_X}: the previous element and next element should be in the same layer. if it's also among {@link #NO_RECORD}, the token won't be recorded
* - {@link #STRING}: the next element is a string or a character. these characters should be considered as one element
* - {@link #ENDING}: append a new {@link EndingNode} to prevent generated nodes being ambiguous. NOTE that it only means the end of an expression or a statement. not the parsing process
* - {@link #COMMENT}: it's the start of a comment. the following chars would be ignored
* - {@link #PAIR}: for keys, it will start a new layer. for values, it will end the layer created by the key
*
*
*
* here's an example of how the method works
* given the following input:
*
* val map={'name':'cass'}
*
*
* - the most front and longest token is ' ', and ' ' is in {@link #NO_RECORD} :::
val/map={'name':'cass'}
* - the most front and longest token is '=' :::
val/map/=/{'name':'cass'}
* - the most front and longest token is '{', and '{' is a key of {@link #PAIR} :::
val/map/=/{/(LAYER-START/'name':'cass'})
* - the most front and longest token is "'", and "'" is in {@link #STRING} :::
val/map/=/{/(LAYER-START/'name'/:'cass'})
* - the most front and longest token is ':' :::
val/map/=/{/(LAYER-START/'name'/:/'cass'})
* - the most front and longest token is "'", and "'" is in {@link #STRING} :::
val/map/=/{/(LAYER-START/'name'/:/'cass'/})
* - the most front and longest token is "}", and '}' is a value of {@link #PAIR} :::
val/map/=/{/(LAYER-START/'name'/:/'cass')}
*
* the result is val/map/=/{/(LAYER-START/'name'/:/'cass')}
* set a breakpoint in the method and focus on variable line, you will get exactly the same intermediate results
*
* @param line line to parse
* @param args args context
* @throws SyntaxException syntax exceptions, including {@link SyntaxException}, {@link UnexpectedTokenException}
*/
private void scan(String line, Args args) throws SyntaxException {
if (line.isEmpty()) return;
// check multiple line comment
if (args.multipleLineComment) {
if (line.contains(MultipleLineCommentEnd)) {
int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length();
args.currentCol += subCol;
line = line.substring(subCol);
args.multipleLineComment = false;
} else {
return;
}
}
// check SPLIT
// find the pattern at minimum location index and with longest words
int minIndex = line.length();
String token = null; // recorded token
for (String s : SPLIT) {
if (line.contains(s)) {
int index = line.indexOf(s);
if (index != -1 && index < minIndex) {
minIndex = index;
token = s;
}
}
}
if (token == null) {
if (!line.isEmpty()) {
// not found, simply append whole input to previous
TokenType type = getTokenType(line, args.generateLineCol());
if (type != null) {
// unknown token, ignore this token
args.previous = new Element(args, line, type);
args.currentCol += line.length();
}
}
} else {
String copyOfLine = line;
String str = line.substring(0, minIndex);
if (!str.isEmpty()) {
// record text before the token
TokenType type = getTokenType(str, args.generateLineCol());
if (type != null) {
args.previous = new Element(args, str, type);
}
args.currentCol += str.length();
}
if (LAYER.contains(token)) {
// start new layer
args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
createStartNode(args, args.startNodeStack.lastElement().getIndent().getIndent() + 4);
} else if (SPLIT_X.contains(token)) {
// do split check
if (!NO_RECORD.contains(token)) {
// record this token
args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
}
} else if (STRING.contains(token)) {
// string literal
int lastIndex = minIndex;
while (true) {
int index = line.indexOf(token, lastIndex + token.length());
if (token.equals("//")) {
while (line.length() > index + 2) {
if (line.charAt(index + 2) == '/') {
++index;
} else {
break;
}
}
}
if (line.length() <= 1 || index == -1) {
err.SyntaxException("end of string not found", args.generateLineCol());
// assume that the end is line end
err.debug("assume that the " + token + " end is line end");
String generated = line.substring(minIndex) + token;
args.previous = new Element(args, generated, getTokenType(generated, args.generateLineCol()));
args.currentCol += (index - minIndex) - token.length(); // the length would be added in later steps
line = line.substring(index + 1);
break;
} else {
String c = String.valueOf(line.charAt(index - 1));
// check
boolean isStringEnd = !ESCAPE.equals(c) || checkStringEnd(line, index - 1);
if (isStringEnd) {
// the string starts at minIndex and ends at index
String s = line.substring(minIndex, index + token.length());
args.previous = new Element(args, s, getTokenType(s, args.generateLineCol()));
args.currentCol += (index - minIndex);
line = line.substring(index + token.length());
break;
}
lastIndex = index;
}
}
} else if (ENDING.contains(token)) {
// ending
if (args.previous instanceof Element) {
args.previous = new EndingNode(args, EndingNode.STRONG);
}
} else if (COMMENT.equals(token)) {
// comment
line = ""; // ignore all
} else if (PAIR.containsKey(token)) {
// pair start
args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
createStartNode(args, args.startNodeStack.lastElement().getIndent().getIndent() + 4);
args.pairEntryStack.push(new PairEntry(token, args.startNodeStack.lastElement()));
} else if (PAIR.containsValue(token)) {
// pair end
PairEntry entry = args.pairEntryStack.pop();
String start = entry.key;
if (!token.equals(PAIR.get(start))) {
err.UnexpectedTokenException(PAIR.get(start), token, args.generateLineCol());
// assume that the pair ends
err.debug("assume that the pair ends");
}
ElementStartNode startNode = entry.startNode;
if (startNode.hasNext()) {
err.SyntaxException(
"indentation of " + startNode.next() + " should be " + startNode.getIndent(),
startNode.next().getLineCol());
// fill the LinkedNode with all nodes after the startNode
Node n = startNode.next();
n.setPrevious(null);
startNode.setNext(null);
startNode.setLinkedNode(n);
}
if (args.startNodeStack.lastElement().getIndent().getIndent() >= startNode.getIndent().getIndent()) {
redirectToPairStart(args, startNode.getIndent());
} else {
args.previous = startNode;
}
args.previous = new Element(args, PAIR.get(start), getTokenType(token, args.generateLineCol()));
} else if (token.equals(MultipleLineCommentStart)) {
if (!args.multipleLineComment) {
args.multipleLineComment = true;
}
} else {
err.UnknownTokenException(token, args.generateLineCol());
// unknown token
// simply ignore the token
}
// column
args.currentCol += token.length();
if (copyOfLine.equals(line)) {
// line hasn't changed, do default modification
line = line.substring(minIndex + token.length());
}
// recursively parse
scan(line, args);
}
}
@Override
protected void finalCheck(ElementStartNode root) throws UnknownTokenException {
super.finalCheck(root);
if (root.hasLinkedNode()) {
Node n = root.getLinkedNode();
// remove redundant start node
if (!n.hasNext() && n instanceof ElementStartNode) {
Node newN = ((ElementStartNode) n).getLinkedNode();
root.setLinkedNode(newN);
n = newN;
}
// remove braces
while (n != null) {
if (n instanceof Element) {
if (((Element) n).getContent().equals("{")
|| ((Element) n).getContent().equals("}")) {
removeBraces(root, (Element) n);
}
}
n = n.next();
}
}
}
private void removeBraces(ElementStartNode root, Element n) {
if (n.hasPrevious()) {
n.previous().setNext(n.next());
} else if (n.getContent().equals("{")) {
root.setLinkedNode(n.next());
}
if (n.hasNext()) {
n.next().setPrevious(n.previous());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy