All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.parser.pydml.Pydml.g4 Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

grammar Pydml;

@header
{
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
}

// This antlr grammar is based on Python 3.3 language reference: https://docs.python.org/3.3/reference/grammar.html

tokens { INDENT, DEDENT }

@lexer::members {
    private boolean debugIndentRules = false;

    // Indentation level stack
    private java.util.Stack indents = new java.util.Stack();

    // Extra tokens queue (see the NEWLINE rule).
    private java.util.Queue tokens = new java.util.LinkedList();

    // Number of opened braces, brackets and parenthesis.
    private int opened = 0;

    // This is only used to set the line number for dedent
    private Token lastToken = null;


    @Override
    public void emit(Token t) {
        if(debugIndentRules)
            System.out.println("Emitted token:" + t);

        super.setToken(t);
        tokens.offer(t);
    }


    @Override
    public Token nextToken() {
        if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
            if(debugIndentRules)
                System.out.println("EOF reached and expecting some DEDENTS, so emitting them");

            tokens.poll();
            this.emit(commonToken(PydmlParser.NEWLINE, "\n"));

            // Now emit as much DEDENT tokens as needed.
            while (!indents.isEmpty()) {
                if(debugIndentRules)
                    System.out.println("Emitting (inserted) DEDENTS");

                this.emit(createDedent());
                indents.pop();
            }
            // Put the EOF back on the token stream.
            this.emit(commonToken(PydmlParser.EOF, ""));
        }
        Token next = super.nextToken();
        if (next.getChannel() == Token.DEFAULT_CHANNEL) {
            // Keep track of the last token on the default channel.
            this.lastToken = next;
        }
        Token retVal = tokens.isEmpty() ? next : tokens.poll();

        if(debugIndentRules)
            System.out.println("Returning nextToken: [" + retVal + "]<<" + tokens.isEmpty());

        return retVal;
    }

    private Token createDedent() {
        CommonToken dedent = commonToken(PydmlParser.DEDENT, "");
        dedent.setLine(this.lastToken.getLine());
        return dedent;
    }

    private CommonToken commonToken(int type, String text) {
        // Nike: Main change: This logic was screwed up and was emitting additional 3 characters, so commenting it for now.
        // int start = this.getCharIndex();
        // int stop = start + text.length();
        // return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
        return new CommonToken(type, text); // Main change
    }

    // Calculates the indentation level from the spaces:
    // "Tabs are replaced (from left to right) by one to eight spaces
    // such that the total number of characters up to and including
    // the replacement is a multiple of eight [...]"
    // https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
    static int getIndentationCount(String spaces) {
        int count = 0;
        for (char ch : spaces.toCharArray()) {
            switch (ch) {
                case '\t':
                    count += 8 - (count % 8);
                    break;
                default:
                    // A normal space char.
                    count++;
            }
        }
        return count;
    }
}


// 2. Modify this g4 by comparing it with Java:
// - https://pythonconquerstheuniverse.wordpress.com/2009/10/03/python-java-a-side-by-side-comparison/
// - http://www.cs.gordon.edu/courses/cps122/handouts-2014/From%20Python%20to%20Java%20Lecture/A%20Comparison%20of%20the%20Syntax%20of%20Python%20and%20Java.pdf
// - http://cs.joensuu.fi/~pviktor/python/slides/cheatsheet.pdf
// - http://www.interfaceware.com/manual/chameleon/scripts/quickreference.pdf

// DML Program is a list of expression
// For now, we only allow global function definitions (not nested or inside a while block)
programroot: (blocks+=statement | functionBlocks+=functionStatement)*  NEWLINE* EOF;



statement returns [ org.apache.sysml.parser.common.StatementInfo info ]
@init {
       // This actions occurs regardless of how many alternatives in this rule
       $info = new org.apache.sysml.parser.common.StatementInfo();
} :
    // ------------------------------------------
    // ImportStatement
    'source' OPEN_PAREN filePath = STRING CLOSE_PAREN  'as' namespace=ID  NEWLINE      # ImportStatement
    | 'setwd'  OPEN_PAREN pathValue = STRING CLOSE_PAREN NEWLINE                     # PathStatement
    // ------------------------------------------
    // AssignmentStatement
    | targetList=dataIdentifier '=' 'ifdef' OPEN_PAREN commandLineParam=dataIdentifier ','  source=expression CLOSE_PAREN NEWLINE   # IfdefAssignmentStatement
    // ------------------------------------------
    // Treat function call as AssignmentStatement or MultiAssignmentStatement
    // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A
    // Convert FunctionCallIdentifier(paramExprs, ..) -> source
    | // TODO: Throw an informative error if user doesnot provide the optional assignment
    ( targetList=dataIdentifier '=' )? name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN NEWLINE  # FunctionCallAssignmentStatement
    | OPEN_BRACK targetList+=dataIdentifier (',' targetList+=dataIdentifier)* CLOSE_BRACK '=' name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN  NEWLINE  # FunctionCallMultiAssignmentStatement
    // {notifyErrorListeners("Too many parentheses");}
    // We don't support block statement
    // | '{' body+=expression ';'* ( body+=expression ';'* )*  '}' # BlockStatement
    // ------------------------------------------
    | targetList=dataIdentifier '=' source=expression NEWLINE   # AssignmentStatement
    // IfStatement
    // | 'if' OPEN_PAREN predicate=expression CLOSE_PAREN (ifBody+=statement ';'* |  NEWLINE INDENT (ifBody+=statement)+  DEDENT )  ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)*  '}'))?  # IfStatement
    | 'if' (OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression) ':'  NEWLINE INDENT (ifBody+=statement)+  DEDENT (elifBranches += elifBranch)* ('else'  ':'  NEWLINE INDENT (elseBody+=statement)+  DEDENT )?  # IfStatement
    // ------------------------------------------
    // ForStatement & ParForStatement
    | 'for' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN |  iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':'  NEWLINE INDENT (body+=statement)+  DEDENT  # ForStatement
    // Convert strictParameterizedExpression to HashMap for parForParams
    | 'parfor' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN | iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':' NEWLINE INDENT (body+=statement)+  DEDENT  # ParForStatement
    | 'while' ( OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression ) ':' NEWLINE INDENT (body+=statement)+  DEDENT  # WhileStatement
    // ------------------------------------------
    | NEWLINE #IgnoreNewLine
;

elifBranch returns [ org.apache.sysml.parser.common.StatementInfo info ]
  @init {
        // This actions occurs regardless of how many alternatives in this rule
        $info = new org.apache.sysml.parser.common.StatementInfo();
  } :
     'elif' (OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression) ':'  NEWLINE INDENT (elifBody+=statement)+  DEDENT
;

iterablePredicate returns [ org.apache.sysml.parser.common.ExpressionInfo info ]
  @init {
         // This actions occurs regardless of how many alternatives in this rule
         $info = new org.apache.sysml.parser.common.ExpressionInfo();
  } :
    from=expression ':' to=expression #IterablePredicateColonExpression
    | ID OPEN_PAREN from=expression ',' to=expression (',' increment=expression)? CLOSE_PAREN #IterablePredicateSeqExpression
    ;

functionStatement returns [ org.apache.sysml.parser.common.StatementInfo info ]
@init {
       // This actions occurs regardless of how many alternatives in this rule
       $info = new org.apache.sysml.parser.common.StatementInfo();
} :
    // ------------------------------------------
    // FunctionStatement & ExternalFunctionStatement
    // small change: only allow typed arguments here ... instead of data identifier
    'def' name=ID  OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN  ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )? ':' NEWLINE INDENT (body+=statement)+ DEDENT # InternalFunctionDefExpression
    | 'defExternal' name=ID  OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN  ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )?   'implemented' 'in' OPEN_PAREN ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? CLOSE_PAREN NEWLINE    # ExternalFunctionDefExpression
    // ------------------------------------------
;


// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression
dataIdentifier returns [ org.apache.sysml.parser.common.ExpressionInfo dataInfo ]
@init {
       // This actions occurs regardless of how many alternatives in this rule
       $dataInfo = new org.apache.sysml.parser.common.ExpressionInfo();
       // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier();
} :
    // ------------------------------------------
    // IndexedIdentifier -- allows implicit lower and upper bounds
    name=ID OPEN_BRACK (rowLower=expression)? (rowImplicitSlice=':' (rowUpper=expression)?)?  (',' (colLower=expression)? (colImplicitSlice=':' (colUpper=expression)?)?)? CLOSE_BRACK # IndexedExpression
    // ------------------------------------------
    | ID                                            # SimpleDataIdentifierExpression
    | COMMANDLINE_NAMED_ID                          # CommandlineParamExpression
    | COMMANDLINE_POSITION_ID                       # CommandlinePositionExpression
;
expression returns [ org.apache.sysml.parser.common.ExpressionInfo info ]
@init {
       // This actions occurs regardless of how many alternatives in this rule
       $info = new org.apache.sysml.parser.common.ExpressionInfo();
       // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID);
} :
    // ------------------------------------------
    // BinaryExpression
    // power
     left=expression op='**' right=expression  # PowerExpression
    // unary plus and minus
    | op=('-'|'+') left=expression                        # UnaryExpression
    // sequence - since we are only using this into for loop => Array not supported
    //| left=expression op=':' right=expression             # SequenceExpression
    // matrix multiply
    // | left=expression op='*' right=expression           # MatrixMulExpression
    // modulus and integer division
    | left=expression op=('//' | '%' ) right=expression # ModIntDivExpression
    // arithmetic multiply and divide
    | left=expression op=('*'|'/') right=expression       # MultDivExpression
    // arithmetic addition and subtraction
    | left=expression op=('+'|'-') right=expression       # AddSubExpression
    // ------------------------------------------
    // RelationalExpression
    | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression
    // ------------------------------------------
    // BooleanExpression
    // boolean not
    | op='!' left=expression # BooleanNotExpression
    // boolean and
    | left=expression op=('&'|'and') right=expression # BooleanAndExpression
    // boolean or
    | left=expression op=('|'|'or') right=expression # BooleanOrExpression

    // ---------------------------------
    // only applicable for builtin function expressions
    // Add following additional functions and check number of parameters:
    // power, full, matrix, reshape, dot
    // Also take care whether there is y.transpose() => which sometinamespace
    | name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN ';'*  # BuiltinFunctionExpression

    // 4. Atomic
    | OPEN_PAREN left=expression CLOSE_PAREN                       # AtomicExpression

    // Should you allow indexed expression here ?
    // | OPEN_BRACK targetList+=expression (',' targetList+=expression)* CLOSE_BRACK  # MultiIdExpression

    // | BOOLEAN                                       # ConstBooleanIdExpression
    | 'True'                                        # ConstTrueExpression
    | 'False'                                       # ConstFalseExpression
    | INT                                           # ConstIntIdExpression
    | DOUBLE                                        # ConstDoubleIdExpression
    | STRING                                        # ConstStringIdExpression
    | dataIdentifier                                # DataIdExpression
    // Special
    // | 'NULL' | 'NA' | 'Inf' | 'NaN'
;

typedArgNoAssign : paramName=ID ':' paramType=ml_type ;
parameterizedExpression : (paramName=ID '=')? paramVal=expression;
strictParameterizedExpression : paramName=ID '=' paramVal=expression ;
strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ;
// sometimes this is matrix object and sometimes its namespace
ID : (ALPHABET (ALPHABET|DIGIT|'_')*  '.')? ALPHABET (ALPHABET|DIGIT|'_')*
    // Special ID cases:
   // | 'matrix' // --> This is a special case which causes lot of headache
   // | 'scalar' |  'float' | 'int' | 'bool' // corresponds to as.scalar, as.double, as.integer and as.logical
   | 'index.return'
;
// Unfortunately, we have datatype name clashing with builtin function name: matrix :(
// Therefore, ugly work around for checking datatype
ml_type :  valueType | dataType OPEN_BRACK valueType CLOSE_BRACK;
// Note to reduce number of keywords, these are case-sensitive,
// To allow case-insenstive,  'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T')
valueType:
    ID # ValueDataTypeCheck
   //  'int' | 'str' | 'bool' | 'float'
;
dataType:
        // 'scalar' # ScalarDataTypeDummyCheck
        // |
        ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } }
        //|  'matrix' //---> See ID, this causes lot of headache
        ;
INT : DIGIT+  [Ll]?;
// BOOLEAN : 'TRUE' | 'FALSE';
DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]?
| DIGIT+ EXP? [Ll]?
| '.' DIGIT+ EXP? [Ll]?
;
DIGIT: '0'..'9';
ALPHABET : [a-zA-Z] ;
fragment EXP : ('E' | 'e') ('+' | '-')? INT ;
COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*;
COMMANDLINE_POSITION_ID: '$' DIGIT+;

// supports single and double quoted string with escape characters
STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\'';
fragment ESC : '\\' [btnfr"'\\] ;
// Comments, whitespaces and new line
// LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ;
// MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ;
// WHITESPACE : (' ' | '\r' | '\n')+ -> skip ;

OPEN_BRACK : '[' {opened++;};
CLOSE_BRACK : ']' {opened--;};
OPEN_PAREN : '(' {opened++;};
CLOSE_PAREN : ')' {opened--;};
// OPEN_BRACE : '{' {opened++;};
// CLOSE_BRACE : '}' {opened--;};

fragment SPACES : [ \t]+ ;
fragment COMMENT : '#' ~[\r\n]* ;
fragment LINE_JOINING : '\\' SPACES? ( '\r'? '\n' | '\r' ) ;

NEWLINE : ( '\r'? '\n' | '\r' ) SPACES?
{
    String newLine = getText().replaceAll("[^\r\n]+", "");
    String spaces = getText().replaceAll("[\r\n]+", "");
    int next = _input.LA(1);
    if (opened > 0 || next == '\r' || next == '\n' || next == '#') {
        // If we're inside a list or on a blank line, ignore all indents,
        // dedents and line breaks.
        skip();
        if(debugIndentRules) {
            if(next == '\r' || next == '\n') {
                    System.out.println("4.1 Skipping (blank lines)");
            }
            else if(next == '#') {
                System.out.println("4.2 Skipping (comment)");
            }
            else {
                System.out.println("4.2 Skipping something else");
            }
        }
    }
    else {
        emit(commonToken(NEWLINE, newLine));

        int indent = getIndentationCount(spaces);
        int previous = indents.isEmpty() ? 0 : indents.peek();
        if (indent == previous) {
            if(debugIndentRules)
                System.out.println("3. Skipping identation as of same size:" + next);

            // skip indents of the same size as the present indent-size
            skip();
        }
        else if (indent > previous) {
            if(debugIndentRules)
                System.out.println("1. Indent:" + next);

            indents.push(indent);
            emit(commonToken(PydmlParser.INDENT, spaces));
        }
        else {
            // Possibly emit more than 1 DEDENT token.
            while(!indents.isEmpty() && indents.peek() > indent) {
                if(debugIndentRules)
                    System.out.println("2. Dedent:" + next);

                this.emit(createDedent());
                indents.pop();
            }
        }
    }
}
;

SKIP_WS : ( SPACES | COMMENT | LINE_JOINING ) -> skip ;




© 2015 - 2024 Weber Informatics LLC | Privacy Policy