All Downloads are FREE. Search and download functionalities are using the official Maven repository.

persistence.antlr.DocBookCodeGenerator Maven / Gradle / Ivy

There is a newer version: 2.1-60f
Show newest version
package persistence.antlr;

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 */

/** TODO: strip comments from javadoc entries
 */

import java.util.Enumeration;

import persistence.antlr.collections.impl.BitSet;
import persistence.antlr.collections.impl.Vector;

import java.io.PrintWriter; //SAS: changed for proper text file io
import java.io.IOException;
import java.io.FileWriter;

/**Generate P.sgml, a cross-linked representation of P with or without actions */
public class DocBookCodeGenerator extends CodeGenerator {
    /** non-zero if inside syntactic predicate generation */
    protected int syntacticPredLevel = 0;

    /** true during lexer generation, false during parser generation */
    protected boolean doingLexRules = false;

    protected boolean firstElementInAlt;

    protected AlternativeElement prevAltElem = null;	// what was generated last?

    /** Create a Diagnostic code-generator using the given Grammar
     * The caller must still call setTool, setBehavior, and setAnalyzer
     * before generating code.
     */
    public DocBookCodeGenerator() {
        super();
        charFormatter = new JavaCharFormatter();
    }

    /** Encode a string for printing in a HTML document..
     * e.g. encode '<' '>' and similar stuff
     * @param s the string to encode
     */
    static String HTMLEncode(String s) {
        StringBuffer buf = new StringBuffer();

        for (int i = 0, len = s.length(); i < len; i++) {
            char c = s.charAt(i);
            if (c == '&')
                buf.append("&");
            else if (c == '\"')
                buf.append(""");
            else if (c == '\'')
                buf.append("'");
            else if (c == '<')
                buf.append("<");
            else if (c == '>')
                buf.append(">");
            else
                buf.append(c);
        }
        return buf.toString();
    }

    /** Encode a string for printing in a HTML document..
     * e.g. encode '<' '>' and similar stuff
     * @param s the string to encode
     */
    static String QuoteForId(String s) {
        StringBuffer buf = new StringBuffer();

        for (int i = 0, len = s.length(); i < len; i++) {
            char c = s.charAt(i);
            if (c == '_')
                buf.append(".");
            else
                buf.append(c);
        }
        return buf.toString();
    }

    public void gen() {
        // Do the code generation
        try {
            // Loop over all grammars
            Enumeration grammarIter = behavior.grammars.elements();
            while (grammarIter.hasMoreElements()) {
                Grammar g = (Grammar)grammarIter.nextElement();

                // Connect all the components to each other
                /*
				g.setGrammarAnalyzer(analyzer);
				analyzer.setGrammar(g);
				*/
                g.setCodeGenerator(this);

                // To get right overloading behavior across hetrogeneous grammars
                g.generate();

                if (antlrTool.hasError()) {
                    antlrTool.fatalError("Exiting due to errors.");
                }

            }

        }
        catch (IOException e) {
            antlrTool.reportException(e, null);
        }
    }

    /** Generate code for the given grammar element.
     * @param blk The {...} action to generate
     */
    public void gen(ActionElement action) {
        // no-op
    }

    /** Generate code for the given grammar element.
     * @param blk The "x|y|z|..." block to generate
     */
    public void gen(AlternativeBlock blk) {
        genGenericBlock(blk, "");
    }

    /** Generate code for the given grammar element.
     * @param blk The block-end element to generate.  Block-end
     * elements are synthesized by the grammar parser to represent
     * the end of a block.
     */
    public void gen(BlockEndElement end) {
        // no-op
    }

    /** Generate code for the given grammar element.
     * @param blk The character literal reference to generate
     */
    public void gen(CharLiteralElement atom) {
        if (atom.not) {
            _print("~");
        }
        _print(HTMLEncode(atom.atomText) + " ");
    }

    /** Generate code for the given grammar element.
     * @param blk The character-range reference to generate
     */
    public void gen(CharRangeElement r) {
        print(r.beginText + ".." + r.endText + " ");
    }

    /** Generate the lexer HTML file */
    public void gen(LexerGrammar g) throws IOException {
        setGrammar(g);
        antlrTool.reportProgress("Generating " + grammar.getClassName() + TokenTypesFileExt);
        currentOutput = antlrTool.openOutputFile(grammar.getClassName() + TokenTypesFileExt);
        //SAS: changed for proper text file io

        tabs = 0;
        doingLexRules = true;

        // Generate header common to all TXT output files
        genHeader();

        // Output the user-defined lexer premamble
        // RK: guess not..
        // println(grammar.preambleAction.getText());

        // Generate lexer class definition
        println("");

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(HTMLEncode(grammar.comment));
        }

        println("Definition of lexer " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + ".");

        // Generate user-defined parser class members
        // printAction(grammar.classMemberAction.getText());

        /*
		// Generate string literals
		println("");
		println("*** String literals used in the parser");
		println("The following string literals were used in the parser.");
		println("An actual code generator would arrange to place these literals");
		println("into a table in the generated lexer, so that actions in the");
		println("generated lexer could match token text against the literals.");
		println("String literals used in the lexer are not listed here, as they");
		println("are incorporated into the mainstream lexer processing.");
		tabs++;
		// Enumerate all of the symbols and look for string literal symbols
		Enumeration ids = grammar.getSymbols();
		while ( ids.hasMoreElements() ) {
			GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
			// Only processing string literals -- reject other symbol entries
			if ( sym instanceof StringLiteralSymbol ) {
				StringLiteralSymbol s = (StringLiteralSymbol)sym;
				println(s.getId() + " = " + s.getTokenType());
			}
		}
		tabs--;
		println("*** End of string literals used by the parser");
		*/

        // Generate nextToken() rule.
        // nextToken() is a synthetic lexer rule that is the implicit OR of all
        // user-defined lexer rules.
        genNextToken();

        // Generate code for each rule in the lexer

        Enumeration ids = grammar.rules.elements();
        while (ids.hasMoreElements()) {
            RuleSymbol rs = (RuleSymbol)ids.nextElement();
            if (!rs.id.equals("mnextToken")) {
                genRule(rs);
            }
        }

        // Close the lexer output file
        currentOutput.close();
        currentOutput = null;
        doingLexRules = false;
    }

    /** Generate code for the given grammar element.
     * @param blk The (...)+ block to generate
     */
    public void gen(OneOrMoreBlock blk) {
        genGenericBlock(blk, "+");
    }

    /** Generate the parser HTML file */
    public void gen(ParserGrammar g) throws IOException {
        setGrammar(g);
        // Open the output stream for the parser and set the currentOutput
        antlrTool.reportProgress("Generating " + grammar.getClassName() + ".sgml");
        currentOutput = antlrTool.openOutputFile(grammar.getClassName() + ".sgml");

        tabs = 0;

        // Generate the header common to all output files.
        genHeader();

        // Generate parser class definition
        println("");

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(HTMLEncode(grammar.comment));
        }

        println("Definition of parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + ".");

        // Enumerate the parser rules
        Enumeration rules = grammar.rules.elements();
        while (rules.hasMoreElements()) {
            println("");
            // Get the rules from the list and downcast it to proper type
            GrammarSymbol sym = (GrammarSymbol)rules.nextElement();
            // Only process parser rules
            if (sym instanceof RuleSymbol) {
                genRule((RuleSymbol)sym);
            }
        }
        tabs--;
        println("");

        genTail();

        // Close the parser output stream
        currentOutput.close();
        currentOutput = null;
    }

    /** Generate code for the given grammar element.
     * @param blk The rule-reference to generate
     */
    public void gen(RuleRefElement rr) {
        RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);

        // Generate the actual rule description
        _print("");
        _print(rr.targetRule);
        _print("");
        // RK: Leave out args..
        //	if (rr.args != null) {
        //		_print("["+rr.args+"]");
        //	}
        _print(" ");
    }

    /** Generate code for the given grammar element.
     * @param blk The string-literal reference to generate
     */
    public void gen(StringLiteralElement atom) {
        if (atom.not) {
            _print("~");
        }
        _print(HTMLEncode(atom.atomText));
        _print(" ");
    }

    /** Generate code for the given grammar element.
     * @param blk The token-range reference to generate
     */
    public void gen(TokenRangeElement r) {
        print(r.beginText + ".." + r.endText + " ");
    }

    /** Generate code for the given grammar element.
     * @param blk The token-reference to generate
     */
    public void gen(TokenRefElement atom) {
        if (atom.not) {
            _print("~");
        }
        _print(atom.atomText);
        _print(" ");
    }

    public void gen(TreeElement t) {
        print(t + " ");
    }

    /** Generate the tree-walker TXT file */
    public void gen(TreeWalkerGrammar g) throws IOException {
        setGrammar(g);
        // Open the output stream for the parser and set the currentOutput
        antlrTool.reportProgress("Generating " + grammar.getClassName() + ".sgml");
        currentOutput = antlrTool.openOutputFile(grammar.getClassName() + ".sgml");
        //SAS: changed for proper text file io

        tabs = 0;

        // Generate the header common to all output files.
        genHeader();

        // Output the user-defined parser premamble
        println("");
//		println("*** Tree-walker Preamble Action.");
//		println("This action will appear before the declaration of your tree-walker class:");
//		tabs++;
//		println(grammar.preambleAction.getText());
//		tabs--;
//		println("*** End of tree-walker Preamble Action");

        // Generate tree-walker class definition
        println("");

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(HTMLEncode(grammar.comment));
        }

        println("Definition of tree parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + ".");

        // Generate user-defined tree-walker class members
//		println("");
//		println("*** User-defined tree-walker class members:");
//		println("These are the member declarations that you defined for your class:");
//		tabs++;
//		printAction(grammar.classMemberAction.getText());
//		tabs--;
//		println("*** End of user-defined tree-walker class members");

        // Generate code for each rule in the grammar
        println("");
//		println("*** tree-walker rules:");
        tabs++;

        // Enumerate the tree-walker rules
        Enumeration rules = grammar.rules.elements();
        while (rules.hasMoreElements()) {
            println("");
            // Get the rules from the list and downcast it to proper type
            GrammarSymbol sym = (GrammarSymbol)rules.nextElement();
            // Only process tree-walker rules
            if (sym instanceof RuleSymbol) {
                genRule((RuleSymbol)sym);
            }
        }
        tabs--;
        println("");
//		println("*** End of tree-walker rules");

//		println("");
//		println("*** End of tree-walker");

        // Close the tree-walker output stream
        currentOutput.close();
        currentOutput = null;
    }

    /** Generate a wildcard element */
    public void gen(WildcardElement wc) {
        /*
		if ( wc.getLabel()!=null ) {
			_print(wc.getLabel()+"=");
		}
		*/
        _print(". ");
    }

    /** Generate code for the given grammar element.
     * @param blk The (...)* block to generate
     */
    public void gen(ZeroOrMoreBlock blk) {
        genGenericBlock(blk, "*");
    }

    protected void genAlt(Alternative alt) {
        if (alt.getTreeSpecifier() != null) {
            _print(alt.getTreeSpecifier().getText());
        }
        prevAltElem = null;
        for (AlternativeElement elem = alt.head;
             !(elem instanceof BlockEndElement);
             elem = elem.next) {
            elem.generate();
            firstElementInAlt = false;
            prevAltElem = elem;
        }
    }
    /** Generate the header for a block, which may be a RuleBlock or a
     * plain AlternativeBLock.  This generates any variable declarations,
     * init-actions, and syntactic-predicate-testing variables.
     * @blk The block for which the preamble is to be generated.
     */
//	protected void genBlockPreamble(AlternativeBlock blk) {
    // RK: don't dump out init actions
    // dump out init action
//		if ( blk.initAction!=null ) {
//			printAction("{" + blk.initAction + "}");
//		}
//	}
    /** Generate common code for a block of alternatives; return a postscript
     * that needs to be generated at the end of the block.  Other routines
     * may append else-clauses and such for error checking before the postfix
     * is generated.
     */
    public void genCommonBlock(AlternativeBlock blk) {
        if (blk.alternatives.size() > 1)
            println("");
        for (int i = 0; i < blk.alternatives.size(); i++) {
            Alternative alt = blk.getAlternativeAt(i);
            AlternativeElement elem = alt.head;

            if (blk.alternatives.size() > 1)
                print("");

            // dump alt operator |
            if (i > 0 && blk.alternatives.size() > 1) {
                _print("| ");
            }

            // Dump the alternative, starting with predicates
            //
            boolean save = firstElementInAlt;
            firstElementInAlt = true;
            tabs++;	// in case we do a newline in alt, increase the tab indent

            genAlt(alt);
            tabs--;
            firstElementInAlt = save;
            if (blk.alternatives.size() > 1)
                _println("");
        }
        if (blk.alternatives.size() > 1)
            println("");
    }

    /** Generate a textual representation of the follow set
     * for a block.
     * @param blk  The rule block of interest
     */
    public void genFollowSetForRuleBlock(RuleBlock blk) {
        Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, blk.endNode);
        printSet(grammar.maxk, 1, follow);
    }

    protected void genGenericBlock(AlternativeBlock blk, String blkOp) {
        if (blk.alternatives.size() > 1) {
            // make sure we start on a new line
            _println("");
            if (!firstElementInAlt) {
                // only do newline if the last element wasn't a multi-line block
                //if ( prevAltElem==null ||
                //	 !(prevAltElem instanceof AlternativeBlock) ||
                //	 ((AlternativeBlock)prevAltElem).alternatives.size()==1 )
                //{
                _println("(");
                //}
                //else
                //{
                //	_print("(");
                //}
                // _println("");
                // print("(\t");
            }
            else {
                _print("(");
            }
        }
        else {
            _print("( ");
        }
        // RK: don't dump init actions
        //	genBlockPreamble(blk);
        genCommonBlock(blk);
        if (blk.alternatives.size() > 1) {
            _println("");
            print(")" + blkOp + " ");
            // if not last element of alt, need newline & to indent
            if (!(blk.next instanceof BlockEndElement)) {
                _println("");
                print("");
            }
        }
        else {
            _print(")" + blkOp + " ");
        }
    }

    /** Generate a header that is common to all TXT files */
    protected void genHeader() {
        println("");
        println("");
        println("");
        println("");
        println("Grammar " + grammar.getClassName() + "");
        println("  ");
        println("    ");
        println("    ");
        println("    ");
        println("    ");
        println("     
"); println(" "); println("
"); println("
"); println("
"); println(" "); println(" "); println(" Generated by ANTLR" + antlrTool.version); println(" from " + antlrTool.grammarFile); println(" "); println(" "); println(" "); println(" "); println(" "); println(" "); println(" "); println("
"); println(""); println(""); } /**Generate the lookahead set for an alternate. */ protected void genLookaheadSetForAlt(Alternative alt) { if (doingLexRules && alt.cache[1].containsEpsilon()) { println("MATCHES ALL"); return; } int depth = alt.lookaheadDepth; if (depth == GrammarAnalyzer.NONDETERMINISTIC) { // if the decision is nondeterministic, do the best we can: LL(k) // any predicates that are around will be generated later. depth = grammar.maxk; } for (int i = 1; i <= depth; i++) { Lookahead lookahead = alt.cache[i]; printSet(depth, i, lookahead); } } /** Generate a textual representation of the lookahead set * for a block. * @param blk The block of interest */ public void genLookaheadSetForBlock(AlternativeBlock blk) { // Find the maximal lookahead depth over all alternatives int depth = 0; for (int i = 0; i < blk.alternatives.size(); i++) { Alternative alt = blk.getAlternativeAt(i); if (alt.lookaheadDepth == GrammarAnalyzer.NONDETERMINISTIC) { depth = grammar.maxk; break; } else if (depth < alt.lookaheadDepth) { depth = alt.lookaheadDepth; } } for (int i = 1; i <= depth; i++) { Lookahead lookahead = grammar.theLLkAnalyzer.look(i, blk); printSet(depth, i, lookahead); } } /** Generate the nextToken rule. * nextToken is a synthetic lexer rule that is the implicit OR of all * user-defined lexer rules. */ public void genNextToken() { println(""); println("/** Lexer nextToken rule:"); println(" * The lexer nextToken rule is synthesized from all of the user-defined"); println(" * lexer rules. It logically consists of one big alternative block with"); println(" * each user-defined rule being an alternative."); println(" */"); // Create the synthesized rule block for nextToken consisting // of an alternate block containing all the user-defined lexer rules. RuleBlock blk = MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken"); // Define the nextToken rule symbol RuleSymbol nextTokenRs = new RuleSymbol("mnextToken"); nextTokenRs.setDefined(); nextTokenRs.setBlock(blk); nextTokenRs.access = "private"; grammar.define(nextTokenRs); /* // Analyze the synthesized block if (!grammar.theLLkAnalyzer.deterministic(blk)) { println("The grammar analyzer has determined that the synthesized"); println("nextToken rule is non-deterministic (i.e., it has ambiguities)"); println("This means that there is some overlap of the character"); println("lookahead for two or more of your lexer rules."); } */ genCommonBlock(blk); } /** Generate code for a named rule block * @param s The RuleSymbol describing the rule to generate */ public void genRule(RuleSymbol s) { if (s == null || !s.isDefined()) return; // undefined rule println(""); if (s.access.length() != 0) { if (!s.access.equals("public")) { _print("" + s.access + " "); } } println("
"); println("" + s.getId() + ""); if (s.comment != null) { _println("" + HTMLEncode(s.comment) + ""); } println(""); // Get rule return type and arguments RuleBlock rblk = s.getBlock(); // RK: for HTML output not of much value... // Gen method return value(s) // if (rblk.returnAction != null) { // _print("["+rblk.returnAction+"]"); // } // Gen arguments // if (rblk.argAction != null) // { // _print(" returns [" + rblk.argAction+"]"); // } _println(""); print(s.getId() + ":\t"); tabs++; // Dump any init-action // genBlockPreamble(rblk); // Dump the alternates of the rule genCommonBlock(rblk); _println(""); // println(";"); tabs--; _println(""); _println("
"); } /** Generate the syntactic predicate. This basically generates * the alternative block, buts tracks if we are inside a synPred * @param blk The syntactic predicate block */ protected void genSynPred(SynPredBlock blk) { // no op } public void genTail() { println("
"); println("
"); } /** Generate the token types TXT file */ protected void genTokenTypes(TokenManager tm) throws IOException { // Open the token output TXT file and set the currentOutput stream antlrTool.reportProgress("Generating " + tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt); currentOutput = antlrTool.openOutputFile(tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt); //SAS: changed for proper text file io tabs = 0; // Generate the header common to all diagnostic files genHeader(); // Generate a string for each token. This creates a static // array of Strings indexed by token type. println(""); println("*** Tokens used by the parser"); println("This is a list of the token numeric values and the corresponding"); println("token identifiers. Some tokens are literals, and because of that"); println("they have no identifiers. Literals are double-quoted."); tabs++; // Enumerate all the valid token types Vector v = tm.getVocabulary(); for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) { String s = (String)v.elementAt(i); if (s != null) { println(s + " = " + i); } } // Close the interface tabs--; println("*** End of tokens used by the parser"); // Close the tokens output file currentOutput.close(); currentOutput = null; } /// unused. protected String processActionForSpecialSymbols(String actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo) { return actionStr; } /** Get a string for an expression to generate creation of an AST subtree. * @param v A Vector of String, where each element is an expression in the target language yielding an AST node. */ public String getASTCreateString(Vector v) { return null; } /** Get a string for an expression to generate creating of an AST node * @param str The arguments to the AST constructor */ public String getASTCreateString(GrammarAtom atom, String str) { return null; } /** Map an identifier to it's corresponding tree-node variable. * This is context-sensitive, depending on the rule and alternative * being generated * @param id The identifier name to map * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned. */ public String mapTreeId(String id, ActionTransInfo tInfo) { return id; } /** Format a lookahead or follow set. * @param depth The depth of the entire lookahead/follow * @param k The lookahead level to print * @param lookahead The lookahead/follow set to print */ public void printSet(int depth, int k, Lookahead lookahead) { int numCols = 5; int[] elems = lookahead.fset.toArray(); if (depth != 1) { print("k==" + k + ": {"); } else { print("{ "); } if (elems.length > numCols) { _println(""); tabs++; print(""); } int column = 0; for (int i = 0; i < elems.length; i++) { column++; if (column > numCols) { _println(""); print(""); column = 0; } if (doingLexRules) { _print(charFormatter.literalChar(elems[i])); } else { _print((String)grammar.tokenManager.getVocabulary().elementAt(elems[i])); } if (i != elems.length - 1) { _print(", "); } } if (elems.length > numCols) { _println(""); tabs--; print(""); } _println(" }"); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy