All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lt.compiler.AbstractScanner Maven / Gradle / Ivy

Go to download

The latte-lang compiler project, which contains compiler and runtime required library.

The newest version!
package lt.compiler;

import lt.compiler.lexical.*;
import lt.compiler.syntactic.UnknownTokenException;

import java.io.IOException;
import java.io.Reader;
import java.util.*;

/**
 * the base scanner.
 */
public abstract class AbstractScanner implements Scanner {
        /**
         * the scanner creates a new Layer when meets these strings
* e.g.
*
         * lambda = ()-> a+b
         * 
* would be considered as the following token tree
*
         * [lambda]-[=]-[(]-[)]-[->]-[|]-[END]
         *                            |
         *                            +-[a]-[+]-[b]
         * 
*/ public final Set LAYER = new HashSet(Arrays.asList("->", "=>")); /** * the input should be split when meets these tokens */ public final Set SPLIT_X = new HashSet(Arrays.asList( ".", // class positioning or method access ":", // type specification or generic extends "::", // package::package "=", // assignment "^^", // pow "!", "&&", "||", // logic "!=", "==", "!==", "===", // equals/reference equals "<", ">", "<=", ">=", // comparison or generic extends/super "+", "-", "*", "/", "%", // operators "++", "--", "@", // @Annotation "...", // pass ":::", // concat ":=", // assign "#", // generator "<-" // pattern matching destructing )); public final Set SPLIT_TWO_VAR_OP_THAT_CAN_BE_USED_WITH_ASSIGN = new HashSet(Arrays.asList( "+", "-", "*", "/", "%", "<<", ">>", ">>>", // shift "&", "^", "|", "~" // bit logic )); /** * symbols that let the scanner know the following input should be scanned as a string
* a string starts with one of these symbols and ends with the same symbol. */ public final Set STRING = new HashSet(Arrays.asList("\"", "'", "`")); /** * the escape character. */ public static String ESCAPE = "\\"; /** * these tokens split the input, but them themselves won't be recorded into the token tree. e.g.
*
         * list add 1
         * 
* the spaces split the input, but they won't be recorded. */ public final Set NO_RECORD = new HashSet(Collections.singletonList(" ")); /** * the str is considered as ending text. Append an EndingNode to the token tree */ public static Set ENDING = new HashSet(Arrays.asList(",", ";")); /** * comment, strings after the token are ignored */ public static String COMMENT = "//"; /** * multiple line comment start symbol */ public static String MultipleLineCommentStart = "/*"; /** * multiple line comment end symbol */ public static String MultipleLineCommentEnd = "*/"; /** * the scanner creates a new layer when meets a key and the layer finishes at corresponding value
*
         * map = {'name':'cass'}
         * 
* would be considered as the following token tree
*
         * [map]-[=]-[{]-[|]-[}]-[END]
         *                |
         *                --['name']-[:]-['cass']
         * 
*/ public final Map PAIR = new HashMap(); /** * the input should be split when meets these tokens */ public final List SPLIT; { PAIR.put("(", ")"); // arguments/procedures/expressions PAIR.put("{", "}"); // map PAIR.put("[", "]"); // array[index] PAIR.put("<:", ":>"); // generic SPLIT_X.addAll(NO_RECORD); SPLIT_X.addAll(SPLIT_TWO_VAR_OP_THAT_CAN_BE_USED_WITH_ASSIGN); for (String s : SPLIT_TWO_VAR_OP_THAT_CAN_BE_USED_WITH_ASSIGN) { SPLIT_X.add(s + "="); } Set set = new HashSet(); set.addAll(LAYER); set.addAll(SPLIT_X); set.addAll(ENDING); set.add(COMMENT); set.add(MultipleLineCommentStart); set.add(MultipleLineCommentEnd); set.addAll(PAIR.keySet()); set.addAll(PAIR.values()); // the longest string is considered first SPLIT = new ArrayList(set); Collections.sort(SPLIT, new Comparator() { @Override public int compare(String a, String b) { return b.length() - a.length(); } }); SPLIT.addAll(0, STRING); } protected final String fileName; protected final PushLineBackReader reader; protected final Properties properties; protected final ErrorManager err; /** * initiate the processor with a reader * * @param fileName the input text file name * @param reader text reader * @param properties properties for the Scanner * @param err error manager */ public AbstractScanner(String fileName, Reader reader, Properties properties, ErrorManager err) { this.fileName = fileName; this.properties = properties; this.err = err; if (reader instanceof PushLineBackReader) { this.reader = (PushLineBackReader) reader; } else { this.reader = new PushLineBackReader(reader); } } /** * scan the text and generate a token tree. * * @return start node * @throws IOException exception when reading the text * @throws SyntaxException exception when meets a syntax error */ @Override public ElementStartNode scan() throws IOException, SyntaxException { Args args = new Args(); args.fileName = fileName; ElementStartNode elementStartNode = new ElementStartNode(args, new Indent(0)); args.startNodeStack.push(elementStartNode); args.currentLine = properties._LINE_BASE_; scan(args); finalCheck(elementStartNode); return elementStartNode; } protected abstract void scan(Args args) throws IOException, SyntaxException; /** * remove useless EndingNode and useless StartNode
* join double literal
* remove `` from the valid names * * @param root root node * @throws UnknownTokenException the token is unknown */ protected void finalCheck(ElementStartNode root) throws UnknownTokenException { if (root.hasLinkedNode()) { Node n = root.getLinkedNode(); while (n != null) { if (n instanceof ElementStartNode) { finalCheck((ElementStartNode) n); } if (n instanceof EndingNode && (!n.hasNext() || !(n.next() instanceof Element))) { if (n.hasPrevious()) { n.previous().setNext(n.next()); } if (n.hasNext()) { n.next().setPrevious(n.previous()); } } else if (n instanceof Element) { ((Element) n).checkWhetherIsValidName(); if (((Element) n).getContent().equals(".") && n.hasPrevious() && n.hasNext() && n.previous() instanceof Element && n.next() instanceof Element && CompileUtil.isNumber(((Element) n.previous()).getContent()) && CompileUtil.isNumber(((Element) n.next()).getContent()) && !((Element) n.previous()).getContent().contains(".") && !((Element) n.next()).getContent().contains(".")) { Element pre = (Element) n.previous(); Element ne = (Element) n.next(); String s = pre.getContent() + "." + ne.getContent(); Element element = new Element(new Args(), s, getTokenType(s, pre.getLineCol())); element.setLineCol(pre.getLineCol()); element.setPrevious(pre.previous()); element.setNext(ne.next()); element.getLineCol().length = s.length(); if (element.hasPrevious()) { element.previous().setNext(element); } else { root.setLinkedNode(element); } if (element.hasNext()) { element.next().setPrevious(element); } } } n = n.next(); } n = root.getLinkedNode(); while (n != null) { if (n instanceof ElementStartNode && n.hasNext() && !(n.next() instanceof EndingNode)) { Node next = n.next(); Args args = new Args(); args.previous = n; args.currentLine = n.getLineCol().line; args.currentCol = n.getLineCol().column; EndingNode endingNode = new EndingNode(args, EndingNode.SYNTHETIC); endingNode.setNext(next); next.setPrevious(endingNode); } n = n.next(); } } else { if (root.hasPrevious()) { root.previous().setNext(root.next()); } if (root.hasNext()) { root.next().setPrevious(root.previous()); } } } /** * @param str the token to check type * @param lineCol line column file * @return TokenType or null if it's an unknown token * @throws UnknownTokenException exception */ protected final TokenType getTokenType(String str, LineCol lineCol) throws UnknownTokenException { if (CompileUtil.isBoolean(str)) return TokenType.BOOL; if (CompileUtil.isModifier(str)) return TokenType.MODIFIER; if (CompileUtil.isNumber(str)) return TokenType.NUMBER; if (CompileUtil.isString(str)) return TokenType.STRING; if (CompileUtil.isKey(str)) return TokenType.KEY; // however in/is/not are two variable operators, they are marked as keys if (CompileUtil.isSymbol(str)) return TokenType.SYMBOL; if (SPLIT.contains(str)) return TokenType.SYMBOL; if (CompileUtil.isValidName(str)) return TokenType.VALID_NAME; err.UnknownTokenException(str, lineCol); // ignore the token, and return null return null; } /** * create an {@link ElementStartNode} * * @param args args context * @param indentation indentation of the new element start node */ protected final void createStartNode(Args args, int indentation) { ElementStartNode elementStartNode = new ElementStartNode(args, new Indent(indentation)); args.previous = null; args.startNodeStack.push(elementStartNode); } /** * check whether it's the string end. count the `\`, check whether it%2==0 * * @param line the line * @param index index of `\` * @return true/false */ protected final boolean checkStringEnd(String line, int index) { int count = 0; char[] arr = line.toCharArray(); for (int i = index; i > 0; --i) { char c = arr[i]; if (c == '\\') ++count; else break; } return count % 2 == 0; } /** * pop one or more nodes from {@link Args#startNodeStack}, the last popped node's indentation should be the same as required indent * * @param args args context * @param indent required indentation * @throws UnexpectedTokenException compiling error */ protected final void redirectToPairStart(Args args, Indent indent) throws SyntaxException { if (args.startNodeStack.empty()) { err.SyntaxException("possibly incorrect indentation or mismatched brackets", args.generateLineCol()); return; } ElementStartNode startNode = args.startNodeStack.pop(); // use `==`, compare the reference if (startNode.getIndent() == indent) { if (startNode.hasNext()) { throw new LtBug("startNode in this step should never have nodes appended"); } // do redirect args.previous = startNode; } else { if ((startNode.getIndent().getIndent() != Indent.FLEX && startNode.getIndent().getIndent() < indent.getIndent()) || args.startNodeStack.empty()) { err.SyntaxException("possibly incorrect indentation or mismatched brackets", args.generateLineCol()); return; } redirectToPairStart(args, indent); } } protected final void redirectToDeeperStartNodeByIndent(Args args, int indent, boolean newLine) throws SyntaxException { if (args.startNodeStack.empty()) { throw new LtBug("this should never happen"); } ElementStartNode requiredNode = args.startNodeStack.pop(); if (args.startNodeStack.empty()) { throw new LtBug("this should never happen"); } ElementStartNode startNode = args.startNodeStack.lastElement(); if (startNode.getIndent().getIndent() == indent) { if (startNode.hasNext()) { throw new LtBug("startNode in this step should never have nodes appended"); } // do redirect args.previous = requiredNode; if (newLine) { args.previous = new EndingNode(args, EndingNode.WEAK); } } else { if ((startNode.getIndent().getIndent() != Indent.FLEX && startNode.getIndent().getIndent() < indent) || args.startNodeStack.empty()) { err.SyntaxException("possibly incorrect indentation or mismatched brackets", args.generateLineCol()); return; } redirectToDeeperStartNodeByIndent(args, indent, newLine); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy