All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.diversionmc.parser.Parser Maven / Gradle / Ivy

There is a newer version: 1.1.0
Show newest version
package net.diversionmc.parser;

import net.diversionmc.parser.expression.ExpressionPiece;
import net.diversionmc.parser.expression.PiecePredicate;
import net.diversionmc.parser.expression.PieceSupplier;
import net.diversionmc.parser.group.GroupSupplier;
import net.diversionmc.parser.group.Grouper;
import net.diversionmc.parser.pattern.ParsePattern;
import net.diversionmc.parser.pattern.Sentence;
import net.diversionmc.parser.util.FilePointer;

import java.io.*;
import java.util.*;
import java.util.function.Consumer;
import java.util.function.Predicate;

import static java.lang.Math.max;
import static java.util.stream.Collectors.toList;
import static net.diversionmc.parser.expression.PieceResult.*;
import static net.diversionmc.parser.util.ParserException.ASSERT;

/**
 * Parser - convert any written language into custom format
 *
 * @param  Parsed text output sentence type.
 * @author Kirill Semyonkin - Diversion Network 2021
 */
public final class Parser {
    //
    // Boilerplate
    //
    
    private String name = hashCode() + "";
    private String text = null;
    
    private final List pieces = new LinkedList<>();
    private final List> groups = new LinkedList<>();
    private final Map> patterns = new LinkedHashMap<>();
    
    private Consumer pieceFinish = null;
    private Runnable pre = null;
    private FilePointer end;
    
    private static record ParsePiece(PiecePredicate check, PieceSupplier supplier) {
        public boolean check(char c, FilePointer ptr) {
            return check.apply(c, ptr);
        }
        
        public ExpressionPiece apply(char c, FilePointer ptr) {
            return supplier.apply(c, ptr);
        }
    }
    
    /**
     * Create Parser without any input attached and automatically set name.
     */
    public Parser() {
    }
    
    /**
     * Create Parser with a title.
     *
     * @param name Text title; usually filename.
     */
    public Parser(String name) {
        this.name = name;
    }
    
    /**
     * Create Parser of a text with a title.
     *
     * @param name Text title; usually filename.
     * @param text Input text to parse.
     */
    public Parser(String name, String text) {
        this(name);
        text(text);
    }
    
    /**
     * Create Parser from an input stream with a title.
     *
     * @param name Text title; usually filename.
     * @param is   Input stream to get text from to parse.
     * @throws IOException If stream reading error occurs.
     */
    public Parser(String name, InputStream is) throws IOException {
        this(name);
        readFrom(is);
    }
    
    /**
     * Create Parser from a file.
     *
     * @param f File to parse.
     * @throws IOException If file reading error occurs.
     */
    public Parser(File f) throws IOException {
        readFrom(f);
    }
    
    /**
     * Create Parser without any input attached and automatically set name.
     *
     * @param  Parsed text output sentence type.
     * @return {@link #Parser()}
     */
    public static  Parser parser() {
        return new Parser<>();
    }
    
    /**
     * Create Parser of a text with a title.
     *
     * @param name Text title; usually filename.
     * @param   Parsed text output sentence type.
     * @return {@link #Parser(String)}
     */
    public static  Parser parser(String name) {
        return new Parser<>(name);
    }
    
    /**
     * Create Parser of a text with a title.
     *
     * @param name Text title; usually filename.
     * @param text Input text to parse.
     * @param   Parsed text output sentence type.
     * @return {@link #Parser(String, String)}
     */
    public static  Parser parser(String name, String text) {
        return new Parser<>(name, text);
    }
    
    /**
     * Create Parser from an input stream with a title.
     *
     * @param name Text title; usually filename.
     * @param is   Input stream to get text from to parse.
     * @param   Parsed text output sentence type.
     * @return {@link #Parser(String, InputStream)}
     * @throws IOException If stream reading error occurs.
     */
    public static  Parser parser(String name, InputStream is) throws IOException {
        return new Parser<>(name, is);
    }
    
    /**
     * Create Parser from a file.
     *
     * @param f   File to parse.
     * @param  Parsed text output sentence type.
     * @return {@link #Parser(File)}
     * @throws IOException If file reading error occurs.
     */
    public static  Parser parser(File f) throws IOException {
        return new Parser<>(f);
    }
    
    /**
     * Get title used on creation of this parser. If title was not specified, it is {@link #hashCode()}.
     *
     * @return Text title; usually filename.
     */
    public String name() {
        return name;
    }
    
    /**
     * Get text that was inputted into this Parser.
     *
     * @return Input text to parse.
     */
    public String text() {
        return text;
    }
    
    /**
     * Set text to parse.
     *
     * @param text Input text to parse.
     */
    public Parser text(String text) {
        ASSERT(text != null, "Parser text is null");
        this.text = text + '\n';
        end = new FilePointer(name, max(1, text.split("\n").length), 1);
        return this;
    }
    
    /**
     * Set text to parse from a file, as well as text title.
     *
     * @param f File to parse.
     * @throws IOException If file reading error occurs.
     */
    public Parser readFrom(File f) throws IOException {
        ASSERT(f != null, "Parser file is null");
        
        var s = new StringBuilder();
        if (f.exists()) {
            String text;
            
            try (var br = new BufferedReader(new InputStreamReader(new FileInputStream(f)))) {
                while ((text = br.readLine()) != null) s.append(text).append('\n');
            }
        }
        var n = f.getName();
        int i = n.lastIndexOf('.');
        this.name = i < 0 ? n : n.substring(0, i);
        text(s.append('\n').toString());
        
        return this;
    }
    
    /**
     * Set text to parse from a stream.
     *
     * @param is Input stream to get text from to parse.
     * @throws IOException If stream reading error occurs.
     */
    public Parser readFrom(InputStream is) throws IOException {
        ASSERT(is != null, "Parser stream is null");
        
        var s = new StringBuilder();
        String text;
        
        var br = new BufferedReader(new InputStreamReader(is)); // do not close foreign stream
        while ((text = br.readLine()) != null) s.append(text).append('\n');
        
        text(s.append('\n').toString());
        return this;
    }
    
    //
    // API
    //
    
    /**
     * Get file pointer pointing to the end of the file (line += 1, column = 1).
     *
     * @return End file pointer.
     */
    public FilePointer end() {
        return end;
    }
    
    /**
     * Set the first action to perform on {@link #build()} before starting to parse text.
     *
     * @param pre Action to run.
     */
    public Parser pre(Runnable pre) {
        this.pre = pre;
        return this;
    }
    
    /**
     * Add a parse piece that is always accepted.
     *
     * @param supplier Piece supplier, null is same as not accepting a piece.
     */
    public Parser piece(PieceSupplier supplier) {
        return piece((c, ptr) -> true, supplier);
    }
    
    /**
     * Add a parse piece.
     *
     * @param check    Check whether the piece is acceptable for a character at a position.
     * @param supplier Piece supplier, null is same as not accepting a piece.
     */
    public Parser piece(PiecePredicate check, PieceSupplier supplier) {
        ASSERT(supplier != null, "Parser piece supplier is null");
        pieces.add(new ParsePiece(check, supplier));
        return this;
    }
    
    /**
     * Add an action to run after a piece is completed.
     *
     * @param event Action to run.
     */
    public Parser pieceFinish(Consumer event) {
        pieceFinish = event;
        return this;
    }
    
    /**
     * Create a grouper for specific pieces.
     *
     * @param left     Group opening piece.
     * @param right    Group closing piece.
     * @param supplier Group creator.
     * @param       Opening piece type.
     * @param       Closing piece type.
     */
    public  Parser group(Predicate left,
                                                                                  Predicate right,
                                                                                  GroupSupplier supplier) {
        groups.add(new Grouper<>(left, right, supplier));
        return this;
    }
    
    /**
     * Add a piece to sentence converter.
     *
     * @param id      Name of the pattern.
     * @param pattern The pattern.
     */
    public Parser pattern(String id, ParsePattern pattern) {
        ASSERT(id != null && !id.isBlank(), "Pattern ID cannot be null or empty");
        patterns.put(id, pattern);
        return this;
    }
    
    /**
     * Get a pattern that was already added to this parser.
     *
     * @param id Name of the pattern.
     */
    public ParsePattern pattern(String id) {
        return patterns.get(id);
    }
    
    /**
     * Get all patterns that were added to this parser.
     *
     * @return Array of the patterns to use in matchOne().
     */
    @SuppressWarnings("unchecked")
    public ParsePattern[] patterns() {
        return patterns.values().toArray(ParsePattern[]::new);
    }
    
    /**
     * Convert input text into a list of usable sentences.
     *
     * @return List of sentences.
     */
    public List build() {
        if (text.isBlank()) return Collections.emptyList();
        if (pre != null) pre.run();
        
        // Convert text into expressions
        List expressions = new LinkedList<>();
        ExpressionPiece last = null;
        FilePointer ptr = null;
        
        int pos = 0, len = text.length(),
            line = 1, col = 1;
        while (pos < len) {
            ptr = new FilePointer(name, line, col);
            char c = text.charAt(pos);
            
            // Read to current character
            if (last != null) {
                var r = last.read(c, ptr); // null is same as TAKE
                
                // LEAVE and REPLACE_LEAVE do not use up a character,
                //  TAKE, REPLACE_TAKE and CONTINUE do
                if (r != LEAVE && r != REPLACE_LEAVE) {
                    pos++;
                    if (c == '\n') {
                        line++;
                        col = 1;
                    } else col++;
                }
                
                // REPLACE_LEAVE and REPLACE_TAKE end previous expression, null is allowed
                if (r == REPLACE_LEAVE || r == REPLACE_TAKE) last = last.replace(ptr);
                    // CONTINUE does not end an expression
                else if (r != CONTINUE) {
                    if (pieceFinish != null) pieceFinish.accept(last);
                    expressions.add(last);
                    last = null;
                }
                continue;
            }
            
            // Skip dangling whitespaces
            if (Character.isWhitespace(c)) {
                pos++;
                if (c == '\n') {
                    line++;
                    col = 1;
                } else col++;
                continue;
            }
            
            // Find new piece
            FilePointer finalPtr = ptr;
            var found = pieces.stream()
                .filter(piece -> piece.check(c, finalPtr))
                .map(piece -> piece.apply(c, finalPtr))
                .filter(Objects::nonNull)
                .collect(toList());
            ASSERT(found.size() > 0, () -> finalPtr, "Invalid symbol '" + c + "'");
            ASSERT(found.size() == 1, () -> finalPtr, "Ambiguous symbol '" + c + "'");
            last = found.get(0);
        }
        
        FilePointer finalPtr = ptr;
        ASSERT(last == null, () -> finalPtr, "Expression did not end (last: " + last + ")");
        
        // Convert expressions into sentences
        return ParsePattern.match(group(expressions), patterns());
    }
    
    private List group(List content) {
        var result = new LinkedList();
        if (content.size() == 0) return result;
        
        int start = 0, // index of outer group start (when depth turned from 0 to 1)
            depth = 0; // inner group depth, 0 = no group, 1 = outer group, 2... = inner groups
        for (int i = 0, limit = content.size(); i < limit; i++) {
            var current = content.get(i);
            for (var grouper : groups)
                if (grouper.start(current)) { // current is startPiece, endPiece comes later
                    if ((depth == 0 || grouper.start(content.get(start)))
                        && depth++ == 0) start = i;
                    break;
                } else if (grouper.end(current)) { // current is endPiece, content.get(start) is startPiece
                    var startPiece = content.get(start);
                    if (grouper.start(startPiece)) depth--;
                    if (depth == 0) // outer group ended
                        current = grouper.group( // will have result.add(current) called later
                            startPiece, current,
                            group(content.subList(start + 1, i))); // all pieces will be inside the group
                    else ASSERT(depth > 0, current::pointer, "unbalanced group"); // went negative
                    break;
                }
            if (depth == 0) result.add(current); // add piece to result if we are not in a group (anymore)
        }
        
        int finalStart = start;
        ASSERT(depth == 0, () -> content.get(finalStart).pointer(), "unbalanced group");
        
        return result;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy