persistence.antlr.DocBookCodeGenerator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of toplink-essentials
The newest version!
package persistence.antlr;

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 */

/** TODO: strip comments from javadoc entries
 */

import java.util.Enumeration;

import persistence.antlr.collections.impl.BitSet;
import persistence.antlr.collections.impl.Vector;

import java.io.PrintWriter; //SAS: changed for proper text file io
import java.io.IOException;
import java.io.FileWriter;

/**Generate P.sgml, a cross-linked representation of P with or without actions */
public class DocBookCodeGenerator extends CodeGenerator {
    /** non-zero if inside syntactic predicate generation */
    protected int syntacticPredLevel = 0;

    /** true during lexer generation, false during parser generation */
    protected boolean doingLexRules = false;

    protected boolean firstElementInAlt;

    protected AlternativeElement prevAltElem = null;	// what was generated last?

    /** Create a Diagnostic code-generator using the given Grammar
     * The caller must still call setTool, setBehavior, and setAnalyzer
     * before generating code.
     */
    public DocBookCodeGenerator() {
        super();
        charFormatter = new JavaCharFormatter();
    }

    /** Encode a string for printing in a HTML document..
     * e.g. encode '<' '>' and similar stuff
     * @param s the string to encode
     */
    static String HTMLEncode(String s) {
        StringBuffer buf = new StringBuffer();

        for (int i = 0, len = s.length(); i < len; i++) {
            char c = s.charAt(i);
            if (c == '&')
                buf.append("&");
            else if (c == '\"')
                buf.append(""");
            else if (c == '\'')
                buf.append("'");
            else if (c == '<')
                buf.append("<");
            else if (c == '>')
                buf.append(">");
            else
                buf.append(c);
        }
        return buf.toString();
    }

    /** Encode a string for printing in a HTML document..
     * e.g. encode '<' '>' and similar stuff
     * @param s the string to encode
     */
    static String QuoteForId(String s) {
        StringBuffer buf = new StringBuffer();

        for (int i = 0, len = s.length(); i < len; i++) {
            char c = s.charAt(i);
            if (c == '_')
                buf.append(".");
            else
                buf.append(c);
        }
        return buf.toString();
    }

    public void gen() {
        // Do the code generation
        try {
            // Loop over all grammars
            Enumeration grammarIter = behavior.grammars.elements();
            while (grammarIter.hasMoreElements()) {
                Grammar g = (Grammar)grammarIter.nextElement();

                // Connect all the components to each other
                /*
				g.setGrammarAnalyzer(analyzer);
				analyzer.setGrammar(g);
				*/
                g.setCodeGenerator(this);

                // To get right overloading behavior across hetrogeneous grammars
                g.generate();

                if (antlrTool.hasError()) {
                    antlrTool.fatalError("Exiting due to errors.");
                }

            }

        }
        catch (IOException e) {
            antlrTool.reportException(e, null);
        }
    }

    /** Generate code for the given grammar element.
     * @param blk The {...} action to generate
     */
    public void gen(ActionElement action) {
        // no-op
    }

    /** Generate code for the given grammar element.
     * @param blk The "x|y|z|..." block to generate
     */
    public void gen(AlternativeBlock blk) {
        genGenericBlock(blk, "");
    }

    /** Generate code for the given grammar element.
     * @param blk The block-end element to generate.  Block-end
     * elements are synthesized by the grammar parser to represent
     * the end of a block.
     */
    public void gen(BlockEndElement end) {
        // no-op
    }

    /** Generate code for the given grammar element.
     * @param blk The character literal reference to generate
     */
    public void gen(CharLiteralElement atom) {
        if (atom.not) {
            _print("~");
        }
        _print(HTMLEncode(atom.atomText) + " ");
    }

    /** Generate code for the given grammar element.
     * @param blk The character-range reference to generate
     */
    public void gen(CharRangeElement r) {
        print(r.beginText + ".." + r.endText + " ");
    }

    /** Generate the lexer HTML file */
    public void gen(LexerGrammar g) throws IOException {
        setGrammar(g);
        antlrTool.reportProgress("Generating " + grammar.getClassName() + TokenTypesFileExt);
        currentOutput = antlrTool.openOutputFile(grammar.getClassName() + TokenTypesFileExt);
        //SAS: changed for proper text file io

        tabs = 0;
        doingLexRules = true;

        // Generate header common to all TXT output files
        genHeader();

        // Output the user-defined lexer premamble
        // RK: guess not..
        // println(grammar.preambleAction.getText());

        // Generate lexer class definition
        println("");

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(HTMLEncode(grammar.comment));
        }

        println("Definition of lexer " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + ".");

        // Generate user-defined parser class members
        // printAction(grammar.classMemberAction.getText());

        /*
		// Generate string literals
		println("");
		println("*** String literals used in the parser");
		println("The following string literals were used in the parser.");
		println("An actual code generator would arrange to place these literals");
		println("into a table in the generated lexer, so that actions in the");
		println("generated lexer could match token text against the literals.");
		println("String literals used in the lexer are not listed here, as they");
		println("are incorporated into the mainstream lexer processing.");
		tabs++;
		// Enumerate all of the symbols and look for string literal symbols
		Enumeration ids = grammar.getSymbols();
		while ( ids.hasMoreElements() ) {
			GrammarSymbol sym = (GrammarSymbol)ids.nextElement();
			// Only processing string literals -- reject other symbol entries
			if ( sym instanceof StringLiteralSymbol ) {
				StringLiteralSymbol s = (StringLiteralSymbol)sym;
				println(s.getId() + " = " + s.getTokenType());
			}
		}
		tabs--;
		println("*** End of string literals used by the parser");
		*/

        // Generate nextToken() rule.
        // nextToken() is a synthetic lexer rule that is the implicit OR of all
        // user-defined lexer rules.
        genNextToken();

        // Generate code for each rule in the lexer

        Enumeration ids = grammar.rules.elements();
        while (ids.hasMoreElements()) {
            RuleSymbol rs = (RuleSymbol)ids.nextElement();
            if (!rs.id.equals("mnextToken")) {
                genRule(rs);
            }
        }

        // Close the lexer output file
        currentOutput.close();
        currentOutput = null;
        doingLexRules = false;
    }

    /** Generate code for the given grammar element.
     * @param blk The (...)+ block to generate
     */
    public void gen(OneOrMoreBlock blk) {
        genGenericBlock(blk, "+");
    }

    /** Generate the parser HTML file */
    public void gen(ParserGrammar g) throws IOException {
        setGrammar(g);
        // Open the output stream for the parser and set the currentOutput
        antlrTool.reportProgress("Generating " + grammar.getClassName() + ".sgml");
        currentOutput = antlrTool.openOutputFile(grammar.getClassName() + ".sgml");

        tabs = 0;

        // Generate the header common to all output files.
        genHeader();

        // Generate parser class definition
        println("");

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(HTMLEncode(grammar.comment));
        }

        println("Definition of parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + ".");

        // Enumerate the parser rules
        Enumeration rules = grammar.rules.elements();
        while (rules.hasMoreElements()) {
            println("");
            // Get the rules from the list and downcast it to proper type
            GrammarSymbol sym = (GrammarSymbol)rules.nextElement();
            // Only process parser rules
            if (sym instanceof RuleSymbol) {
                genRule((RuleSymbol)sym);
            }
        }
        tabs--;
        println("");

        genTail();

        // Close the parser output stream
        currentOutput.close();
        currentOutput = null;
    }

    /** Generate code for the given grammar element.
     * @param blk The rule-reference to generate
     */
    public void gen(RuleRefElement rr) {
        RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);

        // Generate the actual rule description
        _print("");
        _print(rr.targetRule);
        _print("");
        // RK: Leave out args..
        //	if (rr.args != null) {
        //		_print("["+rr.args+"]");
        //	}
        _print(" ");
    }

    /** Generate code for the given grammar element.
     * @param blk The string-literal reference to generate
     */
    public void gen(StringLiteralElement atom) {
        if (atom.not) {
            _print("~");
        }
        _print(HTMLEncode(atom.atomText));
        _print(" ");
    }

    /** Generate code for the given grammar element.
     * @param blk The token-range reference to generate
     */
    public void gen(TokenRangeElement r) {
        print(r.beginText + ".." + r.endText + " ");
    }

    /** Generate code for the given grammar element.
     * @param blk The token-reference to generate
     */
    public void gen(TokenRefElement atom) {
        if (atom.not) {
            _print("~");
        }
        _print(atom.atomText);
        _print(" ");
    }

    public void gen(TreeElement t) {
        print(t + " ");
    }

    /** Generate the tree-walker TXT file */
    public void gen(TreeWalkerGrammar g) throws IOException {
        setGrammar(g);
        // Open the output stream for the parser and set the currentOutput
        antlrTool.reportProgress("Generating " + grammar.getClassName() + ".sgml");
        currentOutput = antlrTool.openOutputFile(grammar.getClassName() + ".sgml");
        //SAS: changed for proper text file io

        tabs = 0;

        // Generate the header common to all output files.
        genHeader();

        // Output the user-defined parser premamble
        println("");
//		println("*** Tree-walker Preamble Action.");
//		println("This action will appear before the declaration of your tree-walker class:");
//		tabs++;
//		println(grammar.preambleAction.getText());
//		tabs--;
//		println("*** End of tree-walker Preamble Action");

        // Generate tree-walker class definition
        println("");

        // print javadoc comment if any
        if (grammar.comment != null) {
            _println(HTMLEncode(grammar.comment));
        }

        println("Definition of tree parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + ".");

        // Generate user-defined tree-walker class members
//		println("");
//		println("*** User-defined tree-walker class members:");
//		println("These are the member declarations that you defined for your class:");
//		tabs++;
//		printAction(grammar.classMemberAction.getText());
//		tabs--;
//		println("*** End of user-defined tree-walker class members");

        // Generate code for each rule in the grammar
        println("");
//		println("*** tree-walker rules:");
        tabs++;

        // Enumerate the tree-walker rules
        Enumeration rules = grammar.rules.elements();
        while (rules.hasMoreElements()) {
            println("");
            // Get the rules from the list and downcast it to proper type
            GrammarSymbol sym = (GrammarSymbol)rules.nextElement();
            // Only process tree-walker rules
            if (sym instanceof RuleSymbol) {
                genRule((RuleSymbol)sym);
            }
        }
        tabs--;
        println("");
//		println("*** End of tree-walker rules");

//		println("");
//		println("*** End of tree-walker");

        // Close the tree-walker output stream
        currentOutput.close();
        currentOutput = null;
    }

    /** Generate a wildcard element */
    public void gen(WildcardElement wc) {
        /*
		if ( wc.getLabel()!=null ) {
			_print(wc.getLabel()+"=");
		}
		*/
        _print(". ");
    }

    /** Generate code for the given grammar element.
     * @param blk The (...)* block to generate
     */
    public void gen(ZeroOrMoreBlock blk) {
        genGenericBlock(blk, "*");
    }

    protected void genAlt(Alternative alt) {
        if (alt.getTreeSpecifier() != null) {
            _print(alt.getTreeSpecifier().getText());
        }
        prevAltElem = null;
        for (AlternativeElement elem = alt.head;
             !(elem instanceof BlockEndElement);
             elem = elem.next) {
            elem.generate();
            firstElementInAlt = false;
            prevAltElem = elem;
        }
    }
    /** Generate the header for a block, which may be a RuleBlock or a
     * plain AlternativeBLock.  This generates any variable declarations,
     * init-actions, and syntactic-predicate-testing variables.
     * @blk The block for which the preamble is to be generated.
     */
//	protected void genBlockPreamble(AlternativeBlock blk) {
    // RK: don't dump out init actions
    // dump out init action
//		if ( blk.initAction!=null ) {
//			printAction("{" + blk.initAction + "}");
//		}
//	}
    /** Generate common code for a block of alternatives; return a postscript
     * that needs to be generated at the end of the block.  Other routines
     * may append else-clauses and such for error checking before the postfix
     * is generated.
     */
    public void genCommonBlock(AlternativeBlock blk) {
        if (blk.alternatives.size() > 1)
            println("");
        for (int i = 0; i < blk.alternatives.size(); i++) {
            Alternative alt = blk.getAlternativeAt(i);
            AlternativeElement elem = alt.head;

            if (blk.alternatives.size() > 1)
                print("");

            // dump alt operator |
            if (i > 0 && blk.alternatives.size() > 1) {
                _print("| ");
            }

            // Dump the alternative, starting with predicates
            //
            boolean save = firstElementInAlt;
            firstElementInAlt = true;
            tabs++;	// in case we do a newline in alt, increase the tab indent

            genAlt(alt);
            tabs--;
            firstElementInAlt = save;
            if (blk.alternatives.size() > 1)
                _println("");
        }
        if (blk.alternatives.size() > 1)
            println("");
    }

    /** Generate a textual representation of the follow set
     * for a block.
     * @param blk  The rule block of interest
     */
    public void genFollowSetForRuleBlock(RuleBlock blk) {
        Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, blk.endNode);
        printSet(grammar.maxk, 1, follow);
    }

    protected void genGenericBlock(AlternativeBlock blk, String blkOp) {
        if (blk.alternatives.size() > 1) {
            // make sure we start on a new line
            _println("");
            if (!firstElementInAlt) {
                // only do newline if the last element wasn't a multi-line block
                //if ( prevAltElem==null ||
                //	 !(prevAltElem instanceof AlternativeBlock) ||
                //	 ((AlternativeBlock)prevAltElem).alternatives.size()==1 )
                //{
                _println("(");
                //}
                //else
                //{
                //	_print("(");
                //}
                // _println("");
                // print("(\t");
            }
            else {
                _print("(");
            }
        }
        else {
            _print("( ");
        }
        // RK: don't dump init actions
        //	genBlockPreamble(blk);
        genCommonBlock(blk);
        if (blk.alternatives.size() > 1) {
            _println("");
            print(")" + blkOp + " ");
            // if not last element of alt, need newline & to indent
            if (!(blk.next instanceof BlockEndElement)) {
                _println("");
                print("");
            }
        }
        else {
            _print(")" + blkOp + " ");
        }
    }

    /** Generate a header that is common to all TXT files */
    protected void genHeader() {
        println("");
        println("");
        println("");
        println("");
        println("Grammar " + grammar.getClassName() + "");
        println("  ");
        println("    ");
        println("    ");
        println("    ");
        println("    ");
        println("     ");
        println("     ");
        println("     ");
        println("    ");
        println("  ");
        println("  ");
        println("    ");
        println("    Generated by ANTLR" + antlrTool.version);
        println("    from " + antlrTool.grammarFile);
        println("    ");
        println("  ");
        println("  ");
        println("  ");
        println("  ");
        println("  ");
        println("  ");
        println("");
        println("");
        println("");
    }

    /**Generate the lookahead set for an alternate. */
    protected void genLookaheadSetForAlt(Alternative alt) {
        if (doingLexRules && alt.cache[1].containsEpsilon()) {
            println("MATCHES ALL");
            return;
        }
        int depth = alt.lookaheadDepth;
        if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
            // if the decision is nondeterministic, do the best we can: LL(k)
            // any predicates that are around will be generated later.
            depth = grammar.maxk;
        }
        for (int i = 1; i <= depth; i++) {
            Lookahead lookahead = alt.cache[i];
            printSet(depth, i, lookahead);
        }
    }

    /** Generate a textual representation of the lookahead set
     * for a block.
     * @param blk  The block of interest
     */
    public void genLookaheadSetForBlock(AlternativeBlock blk) {
        // Find the maximal lookahead depth over all alternatives
        int depth = 0;
        for (int i = 0; i < blk.alternatives.size(); i++) {
            Alternative alt = blk.getAlternativeAt(i);
            if (alt.lookaheadDepth == GrammarAnalyzer.NONDETERMINISTIC) {
                depth = grammar.maxk;
                break;
            }
            else if (depth < alt.lookaheadDepth) {
                depth = alt.lookaheadDepth;
            }
        }

        for (int i = 1; i <= depth; i++) {
            Lookahead lookahead = grammar.theLLkAnalyzer.look(i, blk);
            printSet(depth, i, lookahead);
        }
    }

    /** Generate the nextToken rule.
     * nextToken is a synthetic lexer rule that is the implicit OR of all
     * user-defined lexer rules.
     */
    public void genNextToken() {
        println("");
        println("/** Lexer nextToken rule:");
        println(" *  The lexer nextToken rule is synthesized from all of the user-defined");
        println(" *  lexer rules.  It logically consists of one big alternative block with");
        println(" *  each user-defined rule being an alternative.");
        println(" */");

        // Create the synthesized rule block for nextToken consisting
        // of an alternate block containing all the user-defined lexer rules.
        RuleBlock blk = MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken");

        // Define the nextToken rule symbol
        RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
        nextTokenRs.setDefined();
        nextTokenRs.setBlock(blk);
        nextTokenRs.access = "private";
        grammar.define(nextTokenRs);

        /*
		// Analyze the synthesized block
		if (!grammar.theLLkAnalyzer.deterministic(blk))
		{
			println("The grammar analyzer has determined that the synthesized");
			println("nextToken rule is non-deterministic (i.e., it has ambiguities)");
			println("This means that there is some overlap of the character");
			println("lookahead for two or more of your lexer rules.");
		}
		*/

        genCommonBlock(blk);
    }

    /** Generate code for a named rule block
     * @param s The RuleSymbol describing the rule to generate
     */
    public void genRule(RuleSymbol s) {
        if (s == null || !s.isDefined()) return;	// undefined rule
        println("");

        if (s.access.length() != 0) {
            if (!s.access.equals("public")) {
                _print("" + s.access + " ");
            }
        }

        println("");
        println("" + s.getId() + "");
        if (s.comment != null) {
            _println("" + HTMLEncode(s.comment) + "");
        }
        println("");

        // Get rule return type and arguments
        RuleBlock rblk = s.getBlock();

        // RK: for HTML output not of much value...
        // Gen method return value(s)
//		if (rblk.returnAction != null) {
//			_print("["+rblk.returnAction+"]");
//		}
        // Gen arguments
//		if (rblk.argAction != null)
//		{
//				_print(" returns [" + rblk.argAction+"]");
//		}
        _println("");
        print(s.getId() + ":\t");
        tabs++;

        // Dump any init-action
        // genBlockPreamble(rblk);

        // Dump the alternates of the rule
        genCommonBlock(rblk);

        _println("");
//		println(";");
        tabs--;
        _println("");
        _println("
");
    }

    /** Generate the syntactic predicate.  This basically generates
     * the alternative block, buts tracks if we are inside a synPred
     * @param blk  The syntactic predicate block
     */
    protected void genSynPred(SynPredBlock blk) {
        // no op
    }

    public void genTail() {
        println("");
        println("");
    }

    /** Generate the token types TXT file */
    protected void genTokenTypes(TokenManager tm) throws IOException {
        // Open the token output TXT file and set the currentOutput stream
        antlrTool.reportProgress("Generating " + tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt);
        currentOutput = antlrTool.openOutputFile(tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt);
        //SAS: changed for proper text file io
        tabs = 0;

        // Generate the header common to all diagnostic files
        genHeader();

        // Generate a string for each token.  This creates a static
        // array of Strings indexed by token type.
        println("");
        println("*** Tokens used by the parser");
        println("This is a list of the token numeric values and the corresponding");
        println("token identifiers.  Some tokens are literals, and because of that");
        println("they have no identifiers.  Literals are double-quoted.");
        tabs++;

        // Enumerate all the valid token types
        Vector v = tm.getVocabulary();
        for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
            String s = (String)v.elementAt(i);
            if (s != null) {
                println(s + " = " + i);
            }
        }

        // Close the interface
        tabs--;
        println("*** End of tokens used by the parser");

        // Close the tokens output file
        currentOutput.close();
        currentOutput = null;
    }

    /// unused.
    protected String processActionForSpecialSymbols(String actionStr,
                                                    int line,
                                                    RuleBlock currentRule,
                                                    ActionTransInfo tInfo) {
        return actionStr;
    }

    /** Get a string for an expression to generate creation of an AST subtree.
     * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
     */
    public String getASTCreateString(Vector v) {
        return null;
    }

    /** Get a string for an expression to generate creating of an AST node
     * @param str The arguments to the AST constructor
     */
    public String getASTCreateString(GrammarAtom atom, String str) {
        return null;
    }

    /** Map an identifier to it's corresponding tree-node variable.
     * This is context-sensitive, depending on the rule and alternative
     * being generated
     * @param id The identifier name to map
     * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned.
     */
    public String mapTreeId(String id, ActionTransInfo tInfo) {
        return id;
    }

    /** Format a lookahead or follow set.
     * @param depth The depth of the entire lookahead/follow
     * @param k The lookahead level to print
     * @param lookahead  The lookahead/follow set to print
     */
    public void printSet(int depth, int k, Lookahead lookahead) {
        int numCols = 5;

        int[] elems = lookahead.fset.toArray();

        if (depth != 1) {
            print("k==" + k + ": {");
        }
        else {
            print("{ ");
        }
        if (elems.length > numCols) {
            _println("");
            tabs++;
            print("");
        }

        int column = 0;
        for (int i = 0; i < elems.length; i++) {
            column++;
            if (column > numCols) {
                _println("");
                print("");
                column = 0;
            }
            if (doingLexRules) {
                _print(charFormatter.literalChar(elems[i]));
            }
            else {
                _print((String)grammar.tokenManager.getVocabulary().elementAt(elems[i]));
            }
            if (i != elems.length - 1) {
                _print(", ");
            }
        }

        if (elems.length > numCols) {
            _println("");
            tabs--;
            print("");
        }
        _println(" }");
    }
}