lt.compiler.BraceScanner Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of latte-compiler Show documentation
The latte-lang compiler project, which contains compiler and runtime required library.
The newest version!
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2016 KuiGang Wang
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package lt.compiler;

import lt.compiler.lexical.*;
import lt.compiler.syntactic.UnknownTokenException;

import java.io.IOException;
import java.io.Reader;
import java.util.*;

/**
 * scan text which is formed up by braces.

 */
public class BraceScanner extends AbstractScanner {
        /**
         * initiate the processor with a reader
         *
         * @param fileName   the input text file name
         * @param reader     text reader
         * @param properties properties for the Scanner
         * @param err        error manager
         */
        public BraceScanner(String fileName, Reader reader, Properties properties, ErrorManager err) {
                super(fileName, reader, properties, err);

                init();
        }

        private void init() {
                SPLIT_X.addAll(LAYER);
                LAYER.clear();

                Set set = new HashSet();
                set.addAll(LAYER);
                set.addAll(SPLIT_X);
                set.addAll(ENDING);
                set.add(COMMENT);
                set.add(MultipleLineCommentStart);
                set.add(MultipleLineCommentEnd);
                set.addAll(PAIR.keySet());
                set.addAll(PAIR.values());

                SPLIT.clear();
                // the longest string is considered first
                List tmpList = new ArrayList(set);
                Collections.sort(tmpList, new Comparator() {
                        @Override
                        public int compare(String a, String b) {
                                return b.length() - a.length();
                        }
                });
                SPLIT.addAll(tmpList);
                SPLIT.addAll(0, STRING);
        }

        @Override
        public ElementStartNode scan() throws IOException, SyntaxException {
                ElementStartNode root = super.scan();
                finalCheck(root);
                return root;
        }

        @Override
        protected void scan(Args args) throws IOException, SyntaxException {
                String line = reader.readLine();
                while (line != null) {
                        ++args.currentLine;

                        err.putLineRecord(args.fileName, args.currentLine, line);

                        args.currentCol = properties._COLUMN_BASE_;
                        args.useDefine.clear();

                        if (args.multipleLineComment) {
                                if (!line.contains(MultipleLineCommentEnd)) {
                                        line = reader.readLine();
                                        continue;
                                } else {
                                        int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length();
                                        line = line.substring(subCol);
                                        args.currentCol += (subCol + 1);
                                        args.multipleLineComment = false;
                                }
                        }

                        // the line is nothing but comment
                        if (line.trim().startsWith(COMMENT)) {
                                line = reader.readLine();
                                continue;
                        }

                        int COMMENT_index = line.indexOf(COMMENT);
                        if (COMMENT_index != -1) {
                                String pre = line.substring(0, COMMENT_index);
                                String post = line.substring(COMMENT_index);
                                for (Map.Entry definedEntry : args.defined.entrySet()) {
                                        String tmp = pre;
                                        pre = pre.replace(definedEntry.getKey(), definedEntry.getValue());
                                        if (!tmp.equals(pre)) {
                                                args.useDefine.put(definedEntry.getKey(), definedEntry.getValue());
                                        }
                                }
                                line = pre + post;
                        } else {
                                for (Map.Entry definedEntry : args.defined.entrySet()) {
                                        String tmp = line;
                                        line = line.replace(definedEntry.getKey(), definedEntry.getValue());
                                        if (!tmp.equals(line)) {
                                                args.useDefine.put(definedEntry.getKey(), definedEntry.getValue());
                                        }
                                }
                        }

                        // get front spaces
                        int spaces = 0;
                        for (int i = 0; i < line.length(); ++i) {
                                if (line.charAt(i) != ' ') {
                                        spaces = i;
                                        break;
                                }
                        }

                        if (args.currentCol == properties._COLUMN_BASE_) {
                                args.currentCol += spaces + 1;
                        }

                        // remove front and end spaces
                        line = line.trim();

                        // check it's an empty line
                        if (line.isEmpty()) {
                                line = reader.readLine();
                                continue;
                        }

                        // start parsing
                        scan(line, args);

                        if (!args.multipleLineComment) {

                                if (args.previous instanceof Element) {
                                        args.previous = new EndingNode(args, EndingNode.WEAK);
                                }
                        }

                        line = reader.readLine();
                }
        }

        /**
         * when the given line is not empty, do scanning.

         * 
         * check whether the line contains tokens in {@link #SPLIT}

         * get the most front token, if several tokens are at the same position, then choose the longest one
         * if the token not found, consider the whole line as one element and append to previous node
         * else
         * record text before the token as an element and do appending
         * check which category the token is in

         * 
         * {@link #LAYER} means starts after recording the token, a new ElementStartNode should be started, invoke {@link #createStartNode(Args, int)}
         * {@link #SPLIT_X}: the previous element and next element should be in the same layer. if it's also among {@link #NO_RECORD}, the token won't be recorded
         * {@link #STRING}: the next element is a string or a character. these characters should be considered as one element
         * {@link #ENDING}: append a new {@link EndingNode} to prevent generated nodes being ambiguous. NOTE that it only means the end of an expression or a statement. not the parsing process
         * {@link #COMMENT}: it's the start of a comment. the following chars would be ignored
         * {@link #PAIR}: for keys, it will start a new layer. for values, it will end the layer created by the key
         * 
         * 
         * 


         * here's an example of how the method works

         * given the following input:

         *          * val map={'name':'cass'}
         * 
         * 
         * the most front and longest token is ' ', and ' ' is in {@link #NO_RECORD} ::: val/map={'name':'cass'}
         * the most front and longest token is '=' ::: val/map/=/{'name':'cass'}
         * the most front and longest token is '{', and '{' is a key of {@link #PAIR} ::: val/map/=/{/(LAYER-START/'name':'cass'})
         * the most front and longest token is "'", and "'" is in {@link #STRING} ::: val/map/=/{/(LAYER-START/'name'/:'cass'})
         * the most front and longest token is ':' ::: val/map/=/{/(LAYER-START/'name'/:/'cass'})
         * the most front and longest token is "'", and "'" is in {@link #STRING} ::: val/map/=/{/(LAYER-START/'name'/:/'cass'/})
         * the most front and longest token is "}", and '}' is a value of {@link #PAIR} ::: val/map/=/{/(LAYER-START/'name'/:/'cass')}
         * 


         * the result is val/map/=/{/(LAYER-START/'name'/:/'cass')}

         * set a breakpoint in the method and focus on variable line, you will get exactly the same intermediate results
         *
         * @param line line to parse
         * @param args args context
         * @throws SyntaxException syntax exceptions, including {@link SyntaxException}, {@link UnexpectedTokenException}
         */
        private void scan(String line, Args args) throws SyntaxException {
                if (line.isEmpty()) return;

                // check multiple line comment
                if (args.multipleLineComment) {
                        if (line.contains(MultipleLineCommentEnd)) {
                                int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length();
                                args.currentCol += subCol;
                                line = line.substring(subCol);
                                args.multipleLineComment = false;
                        } else {
                                return;
                        }
                }

                // check SPLIT
                // find the pattern at minimum location index and with longest words
                int minIndex = line.length();
                String token = null; // recorded token
                for (String s : SPLIT) {
                        if (line.contains(s)) {
                                int index = line.indexOf(s);
                                if (index != -1 && index < minIndex) {
                                        minIndex = index;
                                        token = s;
                                }
                        }
                }

                if (token == null) {
                        if (!line.isEmpty()) {
                                // not found, simply append whole input to previous
                                TokenType type = getTokenType(line, args.generateLineCol());
                                if (type != null) {
                                        // unknown token, ignore this token
                                        args.previous = new Element(args, line, type);
                                        args.currentCol += line.length();
                                }
                        }
                } else {
                        String copyOfLine = line;
                        String str = line.substring(0, minIndex);
                        if (!str.isEmpty()) {
                                // record text before the token
                                TokenType type = getTokenType(str, args.generateLineCol());
                                if (type != null) {
                                        args.previous = new Element(args, str, type);
                                }
                                args.currentCol += str.length();
                        }

                        if (LAYER.contains(token)) {
                                // start new layer
                                args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
                                createStartNode(args, args.startNodeStack.lastElement().getIndent().getIndent() + 4);
                        } else if (SPLIT_X.contains(token)) {
                                // do split check
                                if (!NO_RECORD.contains(token)) {
                                        // record this token
                                        args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
                                }
                        } else if (STRING.contains(token)) {
                                // string literal
                                int lastIndex = minIndex;
                                while (true) {
                                        int index = line.indexOf(token, lastIndex + token.length());
                                        if (token.equals("//")) {
                                                while (line.length() > index + 2) {
                                                        if (line.charAt(index + 2) == '/') {
                                                                ++index;
                                                        } else {
                                                                break;
                                                        }
                                                }
                                        }
                                        if (line.length() <= 1 || index == -1) {
                                                err.SyntaxException("end of string not found", args.generateLineCol());
                                                // assume that the end is line end
                                                err.debug("assume that the " + token + " end is line end");

                                                String generated = line.substring(minIndex) + token;

                                                args.previous = new Element(args, generated, getTokenType(generated, args.generateLineCol()));
                                                args.currentCol += (index - minIndex) - token.length(); // the length would be added in later steps
                                                line = line.substring(index + 1);

                                                break;
                                        } else {
                                                String c = String.valueOf(line.charAt(index - 1));
                                                // check
                                                boolean isStringEnd = !ESCAPE.equals(c) || checkStringEnd(line, index - 1);

                                                if (isStringEnd) {
                                                        // the string starts at minIndex and ends at index
                                                        String s = line.substring(minIndex, index + token.length());

                                                        args.previous = new Element(args, s, getTokenType(s, args.generateLineCol()));
                                                        args.currentCol += (index - minIndex);
                                                        line = line.substring(index + token.length());
                                                        break;
                                                }

                                                lastIndex = index;
                                        }
                                }
                        } else if (ENDING.contains(token)) {
                                // ending
                                if (args.previous instanceof Element) {
                                        args.previous = new EndingNode(args, EndingNode.STRONG);
                                }
                        } else if (COMMENT.equals(token)) {
                                // comment
                                line = ""; // ignore all
                        } else if (PAIR.containsKey(token)) {
                                // pair start
                                args.previous = new Element(args, token, getTokenType(token, args.generateLineCol()));
                                createStartNode(args, args.startNodeStack.lastElement().getIndent().getIndent() + 4);
                                args.pairEntryStack.push(new PairEntry(token, args.startNodeStack.lastElement()));
                        } else if (PAIR.containsValue(token)) {
                                // pair end
                                PairEntry entry = args.pairEntryStack.pop();
                                String start = entry.key;
                                if (!token.equals(PAIR.get(start))) {
                                        err.UnexpectedTokenException(PAIR.get(start), token, args.generateLineCol());
                                        // assume that the pair ends
                                        err.debug("assume that the pair ends");
                                }

                                ElementStartNode startNode = entry.startNode;
                                if (startNode.hasNext()) {
                                        err.SyntaxException(
                                                "indentation of " + startNode.next() + " should be " + startNode.getIndent(),
                                                startNode.next().getLineCol());
                                        // fill the LinkedNode with all nodes after the startNode
                                        Node n = startNode.next();
                                        n.setPrevious(null);
                                        startNode.setNext(null);
                                        startNode.setLinkedNode(n);
                                }

                                if (args.startNodeStack.lastElement().getIndent().getIndent() >= startNode.getIndent().getIndent()) {
                                        redirectToPairStart(args, startNode.getIndent());
                                } else {
                                        args.previous = startNode;
                                }
                                args.previous = new Element(args, PAIR.get(start), getTokenType(token, args.generateLineCol()));
                        } else if (token.equals(MultipleLineCommentStart)) {
                                if (!args.multipleLineComment) {
                                        args.multipleLineComment = true;
                                }
                        } else {
                                err.UnknownTokenException(token, args.generateLineCol());
                                // unknown token
                                // simply ignore the token
                        }

                        // column
                        args.currentCol += token.length();
                        if (copyOfLine.equals(line)) {
                                // line hasn't changed, do default modification
                                line = line.substring(minIndex + token.length());
                        }
                        // recursively parse
                        scan(line, args);
                }
        }

        @Override
        protected void finalCheck(ElementStartNode root) throws UnknownTokenException {
                super.finalCheck(root);

                if (root.hasLinkedNode()) {
                        Node n = root.getLinkedNode();
                        // remove redundant start node
                        if (!n.hasNext() && n instanceof ElementStartNode) {
                                Node newN = ((ElementStartNode) n).getLinkedNode();
                                root.setLinkedNode(newN);
                                n = newN;
                        }

                        // remove braces
                        while (n != null) {
                                if (n instanceof Element) {
                                        if (((Element) n).getContent().equals("{")
                                                || ((Element) n).getContent().equals("}")) {
                                                removeBraces(root, (Element) n);
                                        }
                                }
                                n = n.next();
                        }
                }
        }

        private void removeBraces(ElementStartNode root, Element n) {

                if (n.hasPrevious()) {
                        n.previous().setNext(n.next());
                } else if (n.getContent().equals("{")) {
                        root.setLinkedNode(n.next());
                }
                if (n.hasNext()) {
                        n.next().setPrevious(n.previous());
                }
        }
}