All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lt.compiler.BraceScanner Maven / Gradle / Ivy

Go to download

The latte-lang compiler project, which contains compiler and runtime required library.

The newest version!
/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2016 KuiGang Wang
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package lt.compiler;

import lt.compiler.lexical.*;
import lt.compiler.syntactic.UnknownTokenException;

import java.io.IOException;
import java.io.Reader;
import java.util.*;

/**
 * scan text which is formed up by braces.
*/ public class BraceScanner extends AbstractScanner { /** * initiate the processor with a reader * * @param fileName the input text file name * @param reader text reader * @param properties properties for the Scanner * @param err error manager */ public BraceScanner(String fileName, Reader reader, Properties properties, ErrorManager err) { super(fileName, reader, properties, err); init(); } private void init() { SPLIT_X.addAll(LAYER); LAYER.clear(); Set set = new HashSet(); set.addAll(LAYER); set.addAll(SPLIT_X); set.addAll(ENDING); set.add(COMMENT); set.add(MultipleLineCommentStart); set.add(MultipleLineCommentEnd); set.addAll(PAIR.keySet()); set.addAll(PAIR.values()); SPLIT.clear(); // the longest string is considered first List tmpList = new ArrayList(set); Collections.sort(tmpList, new Comparator() { @Override public int compare(String a, String b) { return b.length() - a.length(); } }); SPLIT.addAll(tmpList); SPLIT.addAll(0, STRING); } @Override public ElementStartNode scan() throws IOException, SyntaxException { ElementStartNode root = super.scan(); finalCheck(root); return root; } @Override protected void scan(Args args) throws IOException, SyntaxException { String line = reader.readLine(); while (line != null) { ++args.currentLine; err.putLineRecord(args.fileName, args.currentLine, line); args.currentCol = properties._COLUMN_BASE_; args.useDefine.clear(); if (args.multipleLineComment) { if (!line.contains(MultipleLineCommentEnd)) { line = reader.readLine(); continue; } else { int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length(); line = line.substring(subCol); args.currentCol += (subCol + 1); args.multipleLineComment = false; } } // the line is nothing but comment if (line.trim().startsWith(COMMENT)) { line = reader.readLine(); continue; } int COMMENT_index = line.indexOf(COMMENT); if (COMMENT_index != -1) { String pre = line.substring(0, COMMENT_index); String post = line.substring(COMMENT_index); for (Map.Entry definedEntry : args.defined.entrySet()) { String tmp = pre; pre = pre.replace(definedEntry.getKey(), definedEntry.getValue()); if (!tmp.equals(pre)) { args.useDefine.put(definedEntry.getKey(), definedEntry.getValue()); } } line = pre + post; } else { for (Map.Entry definedEntry : args.defined.entrySet()) { String tmp = line; line = line.replace(definedEntry.getKey(), definedEntry.getValue()); if (!tmp.equals(line)) { args.useDefine.put(definedEntry.getKey(), definedEntry.getValue()); } } } // get front spaces int spaces = 0; for (int i = 0; i < line.length(); ++i) { if (line.charAt(i) != ' ') { spaces = i; break; } } if (args.currentCol == properties._COLUMN_BASE_) { args.currentCol += spaces + 1; } // remove front and end spaces line = line.trim(); // check it's an empty line if (line.isEmpty()) { line = reader.readLine(); continue; } // start parsing scan(line, args); if (!args.multipleLineComment) { if (args.previous instanceof Element) { args.previous = new EndingNode(args, EndingNode.WEAK); } } line = reader.readLine(); } } /** * when the given line is not empty, do scanning.
*
    *
  1. check whether the line contains tokens in {@link #SPLIT}
    * get the most front token, if several tokens are at the same position, then choose the longest one
  2. *
  3. if the token not found, consider the whole line as one element and append to previous node
  4. *
  5. else
  6. *
  7. record text before the token as an element and do appending
  8. *
  9. check which category the token is in
    *
      *
    • {@link #LAYER} means starts after recording the token, a new ElementStartNode should be started, invoke {@link #createStartNode(Args, int)}
    • *
    • {@link #SPLIT_X}: the previous element and next element should be in the same layer. if it's also among {@link #NO_RECORD}, the token won't be recorded
    • *
    • {@link #STRING}: the next element is a string or a character. these characters should be considered as one element
    • *
    • {@link #ENDING}: append a new {@link EndingNode} to prevent generated nodes being ambiguous. NOTE that it only means the end of an expression or a statement. not the parsing process
    • *
    • {@link #COMMENT}: it's the start of a comment. the following chars would be ignored
    • *
    • {@link #PAIR}: for keys, it will start a new layer. for values, it will end the layer created by the key
    • *
    *
  10. *

* here's an example of how the method works
* given the following input:
*
         * val map={'name':'cass'}
         * 
*
    *
  1. the most front and longest token is ' ', and ' ' is in {@link #NO_RECORD} ::: val/map={'name':'cass'}
  2. *
  3. the most front and longest token is '=' ::: val/map/=/{'name':'cass'}
  4. *
  5. the most front and longest token is '{', and '{' is a key of {@link #PAIR} ::: val/map/=/{/(LAYER-START/'name':'cass'})
  6. *
  7. the most front and longest token is "'", and "'" is in {@link #STRING} ::: val/map/=/{/(LAYER-START/'name'/:'cass'})
  8. *
  9. the most front and longest token is ':' ::: val/map/=/{/(LAYER-START/'name'/:/'cass'})
  10. *
  11. the most front and longest token is "'", and "'" is in {@link #STRING} ::: val/map/=/{/(LAYER-START/'name'/:/'cass'/})
  12. *
  13. the most front and longest token is "}", and '}' is a value of {@link #PAIR} ::: val/map/=/{/(LAYER-START/'name'/:/'cass')}
  14. *

* the result is val/map/=/{/(LAYER-START/'name'/:/'cass')}
* set a breakpoint in the method and focus on variable line, you will get exactly the same intermediate results * * @param line line to parse * @param args args context * @throws SyntaxException syntax exceptions, including {@link SyntaxException}, {@link UnexpectedTokenException} */ private void scan(String line, Args args) throws SyntaxException { if (line.isEmpty()) return; // check multiple line comment if (args.multipleLineComment) { if (line.contains(MultipleLineCommentEnd)) { int subCol = line.indexOf(MultipleLineCommentEnd) + MultipleLineCommentEnd.length(); args.currentCol += subCol; line = line.substring(subCol); args.multipleLineComment = false; } else { return; } } // check SPLIT // find the pattern at minimum location index and with longest words int minIndex = line.length(); String token = null; // recorded token for (String s : SPLIT) { if (line.contains(s)) { int index = line.indexOf(s); if (index != -1 && index < minIndex) { minIndex = index; token = s; } } } if (token == null) { if (!line.isEmpty()) { // not found, simply append whole input to previous TokenType type = getTokenType(line, args.generateLineCol()); if (type != null) { // unknown token, ignore this token args.previous = new Element(args, line, type); args.currentCol += line.length(); } } } else { String copyOfLine = line; String str = line.substring(0, minIndex); if (!str.isEmpty()) { // record text before the token TokenType type = getTokenType(str, args.generateLineCol()); if (type != null) { args.previous = new Element(args, str, type); } args.currentCol += str.length(); } if (LAYER.contains(token)) { // start new layer args.previous = new Element(args, token, getTokenType(token, args.generateLineCol())); createStartNode(args, args.startNodeStack.lastElement().getIndent().getIndent() + 4); } else if (SPLIT_X.contains(token)) { // do split check if (!NO_RECORD.contains(token)) { // record this token args.previous = new Element(args, token, getTokenType(token, args.generateLineCol())); } } else if (STRING.contains(token)) { // string literal int lastIndex = minIndex; while (true) { int index = line.indexOf(token, lastIndex + token.length()); if (token.equals("//")) { while (line.length() > index + 2) { if (line.charAt(index + 2) == '/') { ++index; } else { break; } } } if (line.length() <= 1 || index == -1) { err.SyntaxException("end of string not found", args.generateLineCol()); // assume that the end is line end err.debug("assume that the " + token + " end is line end"); String generated = line.substring(minIndex) + token; args.previous = new Element(args, generated, getTokenType(generated, args.generateLineCol())); args.currentCol += (index - minIndex) - token.length(); // the length would be added in later steps line = line.substring(index + 1); break; } else { String c = String.valueOf(line.charAt(index - 1)); // check boolean isStringEnd = !ESCAPE.equals(c) || checkStringEnd(line, index - 1); if (isStringEnd) { // the string starts at minIndex and ends at index String s = line.substring(minIndex, index + token.length()); args.previous = new Element(args, s, getTokenType(s, args.generateLineCol())); args.currentCol += (index - minIndex); line = line.substring(index + token.length()); break; } lastIndex = index; } } } else if (ENDING.contains(token)) { // ending if (args.previous instanceof Element) { args.previous = new EndingNode(args, EndingNode.STRONG); } } else if (COMMENT.equals(token)) { // comment line = ""; // ignore all } else if (PAIR.containsKey(token)) { // pair start args.previous = new Element(args, token, getTokenType(token, args.generateLineCol())); createStartNode(args, args.startNodeStack.lastElement().getIndent().getIndent() + 4); args.pairEntryStack.push(new PairEntry(token, args.startNodeStack.lastElement())); } else if (PAIR.containsValue(token)) { // pair end PairEntry entry = args.pairEntryStack.pop(); String start = entry.key; if (!token.equals(PAIR.get(start))) { err.UnexpectedTokenException(PAIR.get(start), token, args.generateLineCol()); // assume that the pair ends err.debug("assume that the pair ends"); } ElementStartNode startNode = entry.startNode; if (startNode.hasNext()) { err.SyntaxException( "indentation of " + startNode.next() + " should be " + startNode.getIndent(), startNode.next().getLineCol()); // fill the LinkedNode with all nodes after the startNode Node n = startNode.next(); n.setPrevious(null); startNode.setNext(null); startNode.setLinkedNode(n); } if (args.startNodeStack.lastElement().getIndent().getIndent() >= startNode.getIndent().getIndent()) { redirectToPairStart(args, startNode.getIndent()); } else { args.previous = startNode; } args.previous = new Element(args, PAIR.get(start), getTokenType(token, args.generateLineCol())); } else if (token.equals(MultipleLineCommentStart)) { if (!args.multipleLineComment) { args.multipleLineComment = true; } } else { err.UnknownTokenException(token, args.generateLineCol()); // unknown token // simply ignore the token } // column args.currentCol += token.length(); if (copyOfLine.equals(line)) { // line hasn't changed, do default modification line = line.substring(minIndex + token.length()); } // recursively parse scan(line, args); } } @Override protected void finalCheck(ElementStartNode root) throws UnknownTokenException { super.finalCheck(root); if (root.hasLinkedNode()) { Node n = root.getLinkedNode(); // remove redundant start node if (!n.hasNext() && n instanceof ElementStartNode) { Node newN = ((ElementStartNode) n).getLinkedNode(); root.setLinkedNode(newN); n = newN; } // remove braces while (n != null) { if (n instanceof Element) { if (((Element) n).getContent().equals("{") || ((Element) n).getContent().equals("}")) { removeBraces(root, (Element) n); } } n = n.next(); } } } private void removeBraces(ElementStartNode root, Element n) { if (n.hasPrevious()) { n.previous().setNext(n.next()); } else if (n.getContent().equals("{")) { root.setLinkedNode(n.next()); } if (n.hasNext()) { n.next().setPrevious(n.previous()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy