
lt.compiler.IndentScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of latte-compiler Show documentation
Show all versions of latte-compiler Show documentation
The latte-lang compiler project, which contains compiler and runtime required library.
The newest version!
/*
* The MIT License (MIT)
*
* Copyright (c) 2016 KuiGang Wang
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package lt.compiler;
import lt.compiler.lexical.*;
import lt.compiler.syntactic.UnknownTokenException;
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
/**
* transform plain Latte text into Tokens
* Latte uses indentation to differ program blocks
* Instead of using INDENT and DEDENT, the Scanner uses a Tree of Tokens to record the indentation info.
* Consistent statements with the same indentation are in the same Layer
*
* class User
* id : int
* name : String
*
* (id : int)
and (name : String)
are in the same layer
* the text would be considered as the following token tree
*
* [class]-[User]-[|]-[END]
* |
* --[id]-[:]-[int]-[END]-[name]-[:]-[String]
*
*
* @see Node
* @see Element
* @see ElementStartNode
* @see EndingNode
*/
public class IndentScanner extends AbstractScanner {
/**
* initiate the processor with a reader
*
* @param fileName the input text file name
* @param reader text reader
* @param properties properties for the Scanner
* @param err error manager
*/
public IndentScanner(String fileName, Reader reader, Properties properties, ErrorManager err) {
super(fileName, reader, properties, err);
}
/**
*
* - read a line from the {@link #reader}
* - check indentation
* - check layer => if indent > lastLayerIndent, then start a new layer.
* elseif indent < lastLayerIndent then it should go back to upper layers
*
* - invoke {@link #scan(String, Args)} to scan the current line
* - append an {@link EndingNode}
*
*
* @param args args context
* @throws IOException exceptions during reading chars from reader
* @throws SyntaxException syntax exceptions, including {@link SyntaxException}, {@link UnexpectedTokenException}, {@link IllegalIndentationException}
* @see #scan(String, Args)
*/
@Override
protected void scan(Args args) throws IOException, SyntaxException {
String line = reader.readLine();
int rootIndent = -1;
while (line != null) {
++args.currentLine;
err.putLineRecord(args.fileName, args.currentLine, line);
args.currentCol = properties._COLUMN_BASE_;
args.useDefine.clear();
if (args.multipleLineComment) {
if (!line.contains(MultipleLineCommentEnd)) {
line = reader.readLine();
continue;
} else {
int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length();
line = line.substring(subCol);
args.currentCol += (subCol + 1);
args.multipleLineComment = false;
}
}
// the line is nothing but comment
if (line.trim().startsWith(COMMENT)) {
line = reader.readLine();
continue;
}
int COMMENT_index = line.indexOf(COMMENT);
if (COMMENT_index != -1) {
String pre = line.substring(0, COMMENT_index);
String post = line.substring(COMMENT_index);
for (Map.Entry definedEntry : args.defined.entrySet()) {
String tmp = pre;
pre = pre.replace(definedEntry.getKey(), definedEntry.getValue());
if (!tmp.equals(pre)) {
args.useDefine.put(definedEntry.getKey(), definedEntry.getValue());
}
}
line = pre + post;
} else {
for (Map.Entry definedEntry : args.defined.entrySet()) {
String tmp = line;
line = line.replace(definedEntry.getKey(), definedEntry.getValue());
if (!tmp.equals(line)) {
args.useDefine.put(definedEntry.getKey(), definedEntry.getValue());
}
}
}
// get front spaces
int spaces = 0;
for (int i = 0; i < line.length(); ++i) {
if (line.charAt(i) != ' ') {
spaces = i;
break;
}
}
// set root indent
if (rootIndent == -1) {
rootIndent = spaces;
spaces = 0;
} else {
spaces -= rootIndent;
}
if (args.currentCol == properties._COLUMN_BASE_) {
args.currentCol += spaces + 1 + rootIndent;
}
int indentation = spaces;
// remove spaces
line = line.trim();
// check it's an empty line
if (line.isEmpty()) {
line = reader.readLine();
continue;
}
// check start node
ElementStartNode lastStartNode = args.startNodeStack.lastElement();
Indent lastIndentElem = lastStartNode.getIndent();
int lastNonFlexIndent = args.getLastNonFlexIndent();
if (indentation > lastNonFlexIndent && lastIndentElem.getIndent() == Indent.FLEX) {
// flex
ElementStartNode parent = args.startNodeStack.elementAt(args.startNodeStack.size() - 2);
Indent parentIndent = parent.getIndent();
if (indentation > parentIndent.getIndent()) {
// greater indent, assign it to the flex
lastIndentElem.setIndent(indentation);
} else {
// smaller or equal
// do redirect
redirectToDeeperStartNodeByIndent(args, indentation, true);
}
} else {
if (lastIndentElem.getIndent() != indentation) {
if (indentation <= lastNonFlexIndent) {
// smaller indent
// check PAIR_END and handle
boolean isPairEnd = false;
for (String pairEnd : PAIR.values()) {
if (line.startsWith(pairEnd)) {
isPairEnd = true;
PairEntry lastPair = args.pairEntryStack.lastElement();
// check indentation
ElementStartNode theStartNode = lastPair.startNode;
// go to it's parent
for (int i = args.startNodeStack.size() - 1; i >= 0; --i) {
ElementStartNode node = args.startNodeStack.get(i);
if (node == theStartNode) {
assert i != 0;
theStartNode = args.startNodeStack.get(i - 1);
break;
}
}
Indent indent = theStartNode.getIndent();
if (indent.getIndent() != Indent.FLEX) {
// current should greater or equal
// else, raise compile error
if (indentation < indent.getIndent()) {
err.IllegalIndentationException(indent.getIndent(), args.generateLineCol());
// error handling: ignore and assume it's correct
}
}
if (!PAIR.get(lastPair.key).equals(pairEnd)) {
// last pair mismatch
// set `isPairEnd` to false
// and throw compile error
// in later steps
isPairEnd = false;
}
break;
}
}
// if is PAIR_END, handle the redirect in later steps
// if not PAIR_END, redirect the startNode
if (!isPairEnd) {
redirectToDeeperStartNodeByIndent(args, indentation, true);
}
} else { // if (lastIndent > indentation) {
// greater indent
createStartNode(args, indentation);
}
}
}
// start parsing
scan(line, args);
if (!args.multipleLineComment) {
if (args.previous instanceof Element) {
args.previous = new EndingNode(args, EndingNode.WEAK);
}
}
line = reader.readLine();
}
}
/**
* when the given line is not empty, do scanning.
*
* - check whether the line contains tokens in {@link #SPLIT}
* get the most front token, if several tokens are at the same position, then choose the longest one
* - if the token not found, consider the whole line as one element and append to previous node
* - else
* - record text before the token as an element and do appending
* - check which category the token is in
*
* - {@link #LAYER} means starts after recording the token, a new ElementStartNode should be started, invoke {@link #createStartNode(Args, int)}
* - {@link #SPLIT_X}: the previous element and next element should be in the same layer. if it's also among {@link #NO_RECORD}, the token won't be recorded
* - {@link #STRING}: the next element is a string or a character. these characters should be considered as one element
* - {@link #ENDING}: append a new {@link EndingNode} to prevent generated nodes being ambiguous. NOTE that it only means the end of an expression or a statement. not the parsing process
* - {@link #COMMENT}: it's the start of a comment. the following chars would be ignored
* - {@link #PAIR}: for keys, it will start a new layer. for values, it will end the layer created by the key
*
*
*
* here's an example of how the method works
* given the following input:
*
* val map={'name':'cass'}
*
*
* - the most front and longest token is ' ', and ' ' is in {@link #NO_RECORD} :::
val/map={'name':'cass'}
* - the most front and longest token is '=' :::
val/map/=/{'name':'cass'}
* - the most front and longest token is '{', and '{' is a key of {@link #PAIR} :::
val/map/=/{/(LAYER-START/'name':'cass'})
* - the most front and longest token is "'", and "'" is in {@link #STRING} :::
val/map/=/{/(LAYER-START/'name'/:'cass'})
* - the most front and longest token is ':' :::
val/map/=/{/(LAYER-START/'name'/:/'cass'})
* - the most front and longest token is "'", and "'" is in {@link #STRING} :::
val/map/=/{/(LAYER-START/'name'/:/'cass'/})
* - the most front and longest token is "}", and '}' is a value of {@link #PAIR} :::
val/map/=/{/(LAYER-START/'name'/:/'cass')}
*
* the result is val/map/=/{/(LAYER-START/'name'/:/'cass')}
* set a breakpoint in the method and focus on variable line, you will get exactly the same intermediate results
*
* @param line line to parse
* @param args args context
* @throws SyntaxException syntax exceptions, including {@link SyntaxException}, {@link UnexpectedTokenException}
*/
private void scan(String line, Args args) throws SyntaxException {
if (line.isEmpty()) return;
// check multiple line comment
if (args.multipleLineComment) {
if (line.contains(MultipleLineCommentEnd)) {
int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length();
args.currentCol += subCol;
line = line.substring(subCol);
args.multipleLineComment = false;
} else {
return;
}
}
// check SPLIT
// find the pattern at minimum location index and with longest words
int minIndex = line.length();
String token = null; // recorded token
for (String s : SPLIT) {
if (line.contains(s)) {
int index = line.indexOf(s);
if (index != -1 && index < minIndex) {
minIndex = index;
token = s;
}
}
}
if (token == null) {
if (!line.isEmpty()) {
// not found, simply append whole input to previous
TokenType type = getTokenType(line, args.generateLineCol());
if (type != null) {
// unknown token, ignore this token
args.previous = new Element(args, line, type);
args.currentCol += line.length();
}
}
} else {
String copyOfLine = line;
String str = line.substring(0, minIndex);
if (!str.isEmpty()) {
// record text before the token
TokenType type = getTokenType(str, args.generateLineCol());
if (type != null) {
args.previous = new Element(args, str, type);
}
args.currentCol += str.length();
}
if (LAYER.contains(token)) {
// start new layer
args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
createStartNode(args, Indent.FLEX);
} else if (SPLIT_X.contains(token)) {
// do split check
if (!NO_RECORD.contains(token)) {
// record this token
args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
}
} else if (STRING.contains(token)) {
// string literal
int lastIndex = minIndex;
while (true) {
int index = line.indexOf(token, lastIndex + token.length());
if (token.equals("//")) {
while (line.length() > index + 2) {
if (line.charAt(index + 2) == '/') {
++index;
} else {
break;
}
}
}
if (line.length() <= 1 || index == -1) {
err.SyntaxException("end of string not found", args.generateLineCol());
// assume that the end is line end
err.debug("assume that the " + token + " end is line end");
String generated = line.substring(minIndex) + token;
args.previous = new Element(args, generated, getTokenType(generated, args.generateLineCol()));
args.currentCol += (index - minIndex) - token.length(); // the length would be added in later steps
line = line.substring(index + 1);
break;
} else {
String c = String.valueOf(line.charAt(index - 1));
// check
boolean isStringEnd = !ESCAPE.equals(c) || checkStringEnd(line, index - 1);
if (isStringEnd) {
// the string starts at minIndex and ends at index
String s = line.substring(minIndex, index + token.length());
args.previous = new Element(args, s, getTokenType(s, args.generateLineCol()));
args.currentCol += (index - minIndex);
line = line.substring(index + token.length());
break;
}
lastIndex = index;
}
}
} else if (ENDING.contains(token)) {
// ending
if (args.previous instanceof Element) {
args.previous = new EndingNode(args, EndingNode.STRONG);
}
} else if (COMMENT.equals(token)) {
// comment
line = ""; // ignore all
} else if (PAIR.containsKey(token)) {
// pair start
args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
createStartNode(args, Indent.FLEX);
args.pairEntryStack.push(new PairEntry(token, args.startNodeStack.lastElement()));
} else if (PAIR.containsValue(token)) {
// pair end
if (args.pairEntryStack.isEmpty()) {
err.UnexpectedTokenException(token, args.generateLineCol());
return;
}
PairEntry entry = args.pairEntryStack.pop();
String start = entry.key;
if (!token.equals(PAIR.get(start))) {
err.UnexpectedTokenException(PAIR.get(start), token, args.generateLineCol());
// assume that the pair ends
err.debug("assume that the pair ends");
}
ElementStartNode pairStartNode = entry.startNode;
if (pairStartNode.hasNext()) {
if (pairStartNode.next() instanceof EndingNode && !pairStartNode.next().hasNext()) {
pairStartNode.setNext(null);
} else {
err.SyntaxException(
"indentation of " + pairStartNode.next() + " should be " + pairStartNode.getIndent(),
pairStartNode.next().getLineCol());
// fill the LinkedNode with all nodes after the pairStartNode
Node n = pairStartNode.next();
n.setPrevious(null);
pairStartNode.setNext(null);
pairStartNode.setLinkedNode(n);
}
}
ElementStartNode lastElement = args.startNodeStack.lastElement();
Indent lastIndentElem = lastElement.getIndent();
int lastIndent = lastIndentElem.getIndent();
int pairIndent = pairStartNode.getIndent().getIndent();
if (lastIndent >= pairIndent) {
redirectToPairStart(args, pairStartNode.getIndent());
} else {
args.previous = pairStartNode;
}
args.previous = new Element(args, PAIR.get(start), getTokenType(token, args.generateLineCol()));
} else if (token.equals(MultipleLineCommentStart)) {
if (!args.multipleLineComment) {
args.multipleLineComment = true;
}
} else {
err.UnknownTokenException(token, args.generateLineCol());
// unknown token
// simply ignore the token
}
// column
args.currentCol += token.length();
if (copyOfLine.equals(line)) {
// line hasn't changed, do default modification
line = line.substring(minIndex + token.length());
}
// recursively parse
scan(line, args);
}
}
@Override
protected void finalCheck(ElementStartNode root) throws UnknownTokenException {
super.finalCheck(root);
if (root.hasLinkedNode()) {
Node n = root.getLinkedNode();
// remove redundant start node
if (!n.hasNext() && n instanceof ElementStartNode) {
Node newN = ((ElementStartNode) n).getLinkedNode();
root.setLinkedNode(newN);
n = newN;
}
while (n != null) {
if (n instanceof Element) {
// remove {...} without `:` between them and not empty
if (((Element) n).getContent().equals("{")) {
Node afterBraceStart = n.next();
if (!(afterBraceStart instanceof Element)
|| !((Element) afterBraceStart).getContent().equals("}")) {
assert afterBraceStart instanceof ElementStartNode;
removeLayerControlSymbols(root, (Element) n);
while (true) {
n = n.next();
if (n instanceof Element
&& ((Element) n).getContent().equals("}")) {
removeLayerControlSymbols(root, (Element) n);
break;
}
}
}
}
}
n = n.next();
}
}
}
private void removeLayerControlSymbols(ElementStartNode root, Element n) {
if (n.hasPrevious()) {
if (n.previous().previous() != null
&& n.previous() instanceof EndingNode
&& n.next() == null) {
// remove the previous ending node
n.previous().previous().setNext(null);
} else {
n.previous().setNext(n.next());
}
} else if (n.getContent().equals("{")) {
root.setLinkedNode(n.next());
}
if (n.hasNext()) {
n.next().setPrevious(n.previous());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy