All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.meteor.Meteor.g Maven / Gradle / Ivy

The newest version!
grammar Meteor;

options {
    language=Java;
    output=AST;
    ASTLabelType=EvaluationExpression;
    backtrack=false;
    //memoize=true;
    superClass=MeteorParserBase;
}

tokens {	
    EXPRESSION;
    OPERATOR;
}

@lexer::header { 
package eu.stratosphere.meteor; 
}

@parser::header { 
package eu.stratosphere.meteor; 

import eu.stratosphere.sopremo.operator.*;
import eu.stratosphere.sopremo.io.*;
import eu.stratosphere.sopremo.query.*;
import eu.stratosphere.sopremo.pact.*;
import eu.stratosphere.sopremo.expressions.*;
import eu.stratosphere.sopremo.function.*;
import eu.stratosphere.sopremo.type.*;
import java.math.*;
import java.util.IdentityHashMap;
}

@rulecatch {
catch (RecognitionException e) {
  throw e;
}
}

@parser::members {
  private Stack paraphrase = new Stack();

  private boolean setInnerOutput(Token VAR, Operator op) {
	  JsonStreamExpression output = new JsonStreamExpression(op.getOutput($objectCreation::mappings.size()));
	  $objectCreation::mappings.add(new ObjectCreation.TagMapping(output, new JsonStreamExpression(op)));
	  getVariableRegistry().getRegistry(1).put(VAR.getText(), output);
	  return true;
	}
  
  protected EvaluationExpression getInputSelection(Token inputVar) throws RecognitionException {
      return getVariableSafely(inputVar).toInputSelection($operator::result);
  }

  public void parseSinks() throws RecognitionException {
    script();
  }
}

script
	:	 (statement ';')+ ->;

statement
	:	(operator | packageImport | functionDefinition | javaudf | adhocSource
// configuration function call 
	| m=functionCall { $m.tree.evaluate(MissingNode.getInstance()); }) ->;
	
packageImport
  :  'using' packageName=ID { getPackageManager().importPackage($packageName.text); } 
     (',' additionalPackage=ID { getPackageManager().importPackage($additionalPackage.text); })* ->;

functionDefinition
  : name=ID '=' func=inlineFunction { addFunction($name.text, $func.func); } -> ;
  
inlineFunction returns [ExpressionFunction func]
@init { List params = new ArrayList(); }
  : FN '('  
  (param=ID { params.add($param); }
  (',' param=ID { params.add($param); })*)? 
  ')' 
  { 
    addConstantScope();
    for(int index = 0; index < params.size(); index++) 
      this.getConstantRegistry().put(params.get(index).getText(), new InputSelection(index)); 
  } 
  '{' def=expression '}'
  { 
    $func = new ExpressionFunction(params.size(), $def.tree);
    removeConstantScope(); 
  } -> ; 

javaudf
  : name=ID '=' JAVAUDF '(' path=STRING ')' 
  { addFunction($name.getText(), path.getText()); } ->;

contextAwareExpression [EvaluationExpression contextExpression]
scope { EvaluationExpression context }
@init { $contextAwareExpression::context = $contextExpression; }
  : ternaryExpression;

expression
  : (ID (ID | VAR))=> operatorExpression
  | ternaryExpression;

ternaryExpression
	:	(orExpression '?')=> ifClause=orExpression '?' ifExpr=orExpression? ':' elseExpr=orExpression
	-> ^(EXPRESSION["TernaryExpression"] $ifClause { ifExpr == null ? $ifClause.tree : $ifExpr.tree } { $elseExpr.tree })
	| (orExpression IF)=> ifExpr2=orExpression IF ifClause2=orExpression
  -> ^(EXPRESSION["TernaryExpression"] $ifClause2 $ifExpr2 { EvaluationExpression.VALUE })
  | orExpression;
	
orExpression
  : exprs+=andExpression ((OR | '||') exprs+=andExpression)*
  -> { $exprs.size() == 1 }? { $exprs.get(0) }
  -> { OrExpression.valueOf((List) $exprs) };
	
andExpression
  : exprs+=elementExpression ((AND | '&&') exprs+=elementExpression)*
  -> { $exprs.size() == 1 }? { $exprs.get(0) }
  -> { AndExpression.valueOf((List) $exprs) };
  
elementExpression
	:	elem=comparisonExpression (not=NOT? IN set=comparisonExpression)? 
	-> { set == null }? $elem
	-> ^(EXPRESSION["ElementInSetExpression"] $elem 
	{ $not == null ? ElementInSetExpression.Quantor.EXISTS_IN : ElementInSetExpression.Quantor.EXISTS_NOT_IN} $set);
	
comparisonExpression
	:	e1=arithmeticExpression ((s='<=' | s='>=' | s='<' | s='>' | s='==' | s='!=') e2=arithmeticExpression)?
	-> 	{ $s == null }? $e1
  ->  { $s.getText().equals("!=") }? ^(EXPRESSION["ComparativeExpression"] $e1 {ComparativeExpression.BinaryOperator.NOT_EQUAL} $e2)
  ->  { $s.getText().equals("==") }? ^(EXPRESSION["ComparativeExpression"] $e1 {ComparativeExpression.BinaryOperator.EQUAL} $e2)
	-> 	^(EXPRESSION["ComparativeExpression"] $e1 {ComparativeExpression.BinaryOperator.valueOfSymbol($s.text)} $e2);
	
arithmeticExpression
	:	e1=multiplicationExpression ((s='+' | s='-') e2=multiplicationExpression)?
	-> 	{ s != null }? ^(EXPRESSION["ArithmeticExpression"] $e1 
		{ s.getText().equals("+") ? ArithmeticExpression.ArithmeticOperator.ADDITION : ArithmeticExpression.ArithmeticOperator.SUBTRACTION} $e2)
	-> 	$e1;
	
multiplicationExpression
	:	e1=preincrementExpression ((s='*' | s=SLASH) e2=preincrementExpression)?
	-> 	{ s != null }? ^(EXPRESSION["ArithmeticExpression"] $e1 
		{ s.getText().equals("*") ? ArithmeticExpression.ArithmeticOperator.MULTIPLICATION : ArithmeticExpression.ArithmeticOperator.DIVISION} $e2)
	-> 	$e1;
	
preincrementExpression
	:	'++' preincrementExpression
	|	'--' preincrementExpression
	|	unaryExpression;
	
unaryExpression
	:	('!' | '~')? castExpression;

/**
 * (int) exp
 * exp as int
 */
castExpression
	:	('(' ID ')')=> '(' type=ID ')' expr=generalPathExpression
  -> { coerce($type.text, $expr.tree) }
	| expr=generalPathExpression ({input.LT(1).getText().equals("as")}? ID type=ID)?
	-> { type == null ? $expr.tree : coerce($type.text, $expr.tree)};
	
generalPathExpression
	: value=valueExpression 
	  ((pathExpression[EvaluationExpression.VALUE])=> path=pathExpression[$value.tree] -> $path
	   | -> $value);

contextAwarePathExpression[EvaluationExpression context]
  : pathExpression[context];
  
pathExpression[EvaluationExpression inExp]
  : // safe method call 
    ('?.' ID '(')=> '?.' call=methodCall[inExp]
      ((pathSegment)=> path=pathExpression[new TernaryExpression(new NotNullOrMissingBooleanExpression().withInputExpression(inExp), $call.tree)]-> $path | 
       -> ^(EXPRESSION["TernaryExpression"] {new NotNullOrMissingBooleanExpression().withInputExpression(inExp)} $call {inExp}))
    // normal method call 
  | ('.' ID '(')=> '.' call=methodCall[inExp]
      ((pathSegment)=> path=pathExpression[$call.tree]-> $path | -> $call)
    // normal path expression    
  | seg=pathSegment { ((PathSegmentExpression) seg.getTree()).setInputExpression(inExp); }
    ((pathSegment)=> path=pathExpression[$seg.tree] -> $path | -> $seg);
  catch [NoViableAltException re] { explainUsage("in a path expression only .field, ?.field, [...], and .method(...) are allowed", re); }

pathSegment
@init {  paraphrase.push("a path expression"); }
@after { paraphrase.pop(); }
  : // add .field or [index] to path
    ('?.')=> '?.' field=ID -> ^(EXPRESSION["TernaryExpression"]  {new NotNullOrMissingBooleanExpression()} {new ObjectAccess($field.text)} {EvaluationExpression.VALUE})  
  | ('.') => '.' field=ID -> ^(EXPRESSION["ObjectAccess"] {$field.text})    
  | ('[') => arrayAccess;

arrayAccess
  : '[' STAR ']' (('.' methodCall[null])=> '.' call=methodCall[EvaluationExpression.VALUE]
	  -> ^(EXPRESSION["ArrayProjection"] $call)
	  | path=pathSegment
	  -> ^(EXPRESSION["ArrayProjection"] $path)) 
  | '[' (pos=INTEGER | pos=UINT) ']' 
  -> ^(EXPRESSION["ArrayAccess"] { Integer.valueOf($pos.text) })
  | '[' (start=INTEGER | start=UINT) ':' (end=INTEGER | end=UINT) ']' 
  -> ^(EXPRESSION["ArrayAccess"] { Integer.valueOf($start.text) } { Integer.valueOf($end.text) });
  
valueExpression
	:	(ID '(')=> functionCall
	| functionReference
  | (FN)=> func=inlineFunction -> ^(EXPRESSION["ConstantExpression"] { new FunctionNode($func.func) })
	| parenthesesExpression 
	| literal 
	| VAR -> { getInputSelection($VAR) }
  | ((ID ':')=> packageName=ID ':')? constant=ID { getScope($packageName.text).getConstantRegistry().get($constant.text) != null }? => 
    -> { getScope($packageName.text).getConstantRegistry().get($constant.text) }  
	| arrayCreation 
	| objectCreation;
	
operatorExpression
	:	op=operator -> ^(EXPRESSION["NestedOperatorExpression"] { $op.op });

parenthesesExpression
	:	('(' expression ')') -> expression;

methodCall [EvaluationExpression targetExpr]
@init { List params = new ArrayList();
        paraphrase.push("a method call"); }
@after { paraphrase.pop(); }
  : (packageName=ID ':')? name=ID '(' 
  ((param=expression { params.add($param.tree); }) 
  (',' (param=expression { params.add($param.tree); }))*)? 
  ')' -> { createCheckedMethodCall($packageName.text, $name, $targetExpr, params.toArray(new EvaluationExpression[params.size()])) };
  
functionCall
	:	methodCall[null];

functionReference
  : '&' ((ID ':')=> packageName=ID ':')? name=ID 
        -> ^(EXPRESSION["ConstantExpression"] { new FunctionNode(getSopremoFunction($packageName.text, $name)) });	

fieldAssignment
	:	((ID ':')=> ID ':' expression 
    { $objectCreation::mappings.add(new ObjectCreation.FieldAssignment($ID.text, $expression.tree)); } -> )
  | VAR 
    ( '.' STAR { $objectCreation::mappings.add(new ObjectCreation.CopyFields(getInputSelection($VAR))); } ->
//      | '=' op=operator { setInnerOutput($VAR, $op.op) }?=>
      | p=contextAwarePathExpression[getVariableSafely($VAR).toInputSelection($operator::result)]
      ( ':' e2=expression { $objectCreation::mappings.add(new ObjectCreation.TagMapping($p.tree, $e2.tree)); } ->
        | /* empty */ { $objectCreation::mappings.add(new ObjectCreation.FieldAssignment(getAssignmentName($p.tree), $p.tree)); } ->
      )
    );
  catch [NoViableAltException re] { explainUsage("inside of a json object {...} only , <\$var.path>, <\$var = operator> or <\$var: expression> are allowed", re); }

objectCreation
scope {  List mappings; }
@init { $objectCreation::mappings = new ArrayList(); 
        paraphrase.push("a json object"); }
@after { paraphrase.pop(); }
	:	'{' (fieldAssignment (',' fieldAssignment)* ','?)? '}' -> ^(EXPRESSION["ObjectCreation"] { $objectCreation::mappings });
  catch [MissingTokenException re] { explainUsage("expected <,> or <}> after a complete field assignment inside of a json object", re); }

literal
@init { paraphrase.push("a literal"); }
@after { paraphrase.pop(); }
	: val='true' -> ^(EXPRESSION["ConstantExpression"] { Boolean.TRUE })
	| val='false' -> ^(EXPRESSION["ConstantExpression"] { Boolean.FALSE })
	| val=DECIMAL -> ^(EXPRESSION["ConstantExpression"] { new BigDecimal($val.text) })
	| val=STRING -> ^(EXPRESSION["ConstantExpression"] { $val.getText() })
  | (val=UINT | val=INTEGER) -> ^(EXPRESSION["ConstantExpression"] { parseInt($val.text) })
  | 'null' -> { ConstantExpression.NULL };

arrayCreation
@init { paraphrase.push("a json array"); }
@after { paraphrase.pop(); }
	:	 '[' (elems+=expression (',' elems+=expression)* ','?)? ']' -> ^(EXPRESSION["ArrayCreation"] { $elems == null ? new EvaluationExpression[0] : $elems.toArray(new EvaluationExpression[$elems.size()]) });

/*
 * An operator is either read/write or generic operator.
 * Each operator defines its own variable scope.
 */
operator returns [Operator op=null]
scope { 
  int numInputs;
  Operator result;
}:	readOperator { $op = $readOperator.source; }
 |  writeOperator { $op = $writeOperator.sink; }
 |  genericOperator { $op = $genericOperator.op; }; 

adhocSource:
  output=VAR '=' exp=arrayCreation 
{ 
  Source source = new Source($exp.tree);
  putVariable(output, new JsonStreamExpression(source));
} -> ;

// read  from  options*
readOperator returns [Source source]
@init { 
  ConfObjectInfo formatInfo = null;
  SopremoFormat fileFormat = null;
  String path = null;
}
	:	(output=VAR '=')? 
	  'read' ((packageName=ID ':')?format=ID)?
	  {input.LT(1).getText().equals("from")}? ID (protocol=ID? filePath=STRING | protocol=ID '(' filePath=STRING ')') 
{ 
  path = makeFilePath($protocol, $filePath.text);
  formatInfo = findFormat($packageName.text, format, path);
  fileFormat = formatInfo.newInstance(); 
  $source = new Source(fileFormat, path); 
  if(output != null)
    putVariable(output, new JsonStreamExpression($source));
}  confOption[getOperatorInfo($source), $source]* 
 ->;

// write   to  options*
writeOperator returns [Sink sink]
@init { 
  ConfObjectInfo formatInfo = null;
  SopremoFormat fileFormat = null;
  String path = null;
  if(state.backtracking == 0) 
    addScope();
}
@after {
  removeScope();
}	:	'write' 
    ((packageName=ID ':')?format=ID)? from=VAR 
	  {input.LT(1).getText().equals("to")}? ID 
	  (protocol=ID? filePath=STRING | protocol=ID '(' filePath=STRING ')')
{ 
  path = makeFilePath($protocol, $filePath.text);
  formatInfo = findFormat($packageName.text, format, path);
  fileFormat = formatInfo.newInstance();
	$sink = new Sink(fileFormat, makeFilePath($protocol, path));
  $sink.setInputs(getVariableSafely(from).getStream());
  this.sinks.add($sink);
} confOption[getOperatorInfo($sink), $sink]* ->;

//  flags* * options*
// flags - boolean options
// inputs - variables starting with $
// options - key value pairs of property name and value
genericOperator returns [Operator op]
@init { 
  ConfObjectInfo> operatorInfo;
}
@after {
  removeScope();
}	:	
(targets+=VAR (',' targets+=VAR)* '=')? 
(packageName=ID ':')? name=ID { (operatorInfo = findOperatorGreedily($packageName.text, $name)) != null  }?=> 
//operatorFlag[operatorInfo, $op]*
{ 
  $operator::result = $op = operatorInfo.newInstance(); 
  // add scope for input variables and recursive definition
  if(state.backtracking == 0) 
    addScope();   
} 
((VAR)=> input[operatorInfo, $op] ((',')=> ',' input[operatorInfo, $op])*)?
{ // register output names for explicit references to output 
  if($targets != null)
    for(int index = 0; index < $targets.size(); index++)
      putVariable((Token) $targets.get(index), new JsonStreamExpression($op.getOutput(index)), 1);   
} 
confOption[operatorInfo, $op]* 
->; 
	
confOption [ConfObjectInfo info, ConfigurableSopremoType object]
@init {
 ConfObjectInfo.ConfObjectPropertyInfo property = null;
} : //{ findOperatorPropertyRelunctantly($genericOperator::operatorInfo, input.LT(1)) != null }?	
  name=ID
	{ (property = findPropertyGreedily(object, info, name)) != null }?=>
  expr=ternaryExpression { property.setValue(object, $expr.tree); } ->;

input	[ConfObjectInfo info, Operator object]
@init {
 ConfObjectInfo.ConfObjectIndexedPropertyInfo inputProperty = null;
}	:	(name=VAR IN)? from=VAR
{ 
  int inputIndex = $operator::numInputs++;
  JsonStreamExpression input = getVariableSafely(from);
  object.setInput(inputIndex, input.getStream());
  
  if($operator.size() == 1) {
	  JsonStreamExpression inputExpression = new JsonStreamExpression(input.getStream(), inputIndex);
	  putVariable(name != null ? name : from, inputExpression);
  }
} 
({ (findInputPropertyRelunctantly(object, info, input.LT(1), false) != null) }?=>
  { inputProperty = findInputPropertyRelunctantly(object, info, input.LT(1), true); }
  expr=ternaryExpression { inputProperty.setValue(object, $operator::numInputs-1, $expr.tree); })?
-> ;

/**
 * Lexer rules
 */	
fragment LOWER_LETTER
	:	'a'..'z';

fragment UPPER_LETTER
	:	'A'..'Z';

fragment DIGIT
	:	'0'..'9';

fragment SIGN:	('+'|'-');

// TYPE  : 'int' | 'decimal' | 'double' | 'string' | 'bool';

JAVAUDF : 'javaudf';

OR  : 'or';

AND  : 'and';

IF  : 'if';

ELSE  : 'else';

NOT : 'not';

IN  : 'in';

FN  : 'fn';

ID	:	(LOWER_LETTER | UPPER_LETTER | '_') (LOWER_LETTER | UPPER_LETTER | DIGIT | '_')*;

VAR	:	'$' ID;

STAR	:	'*';

COMMENT
    :   '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
    |   '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
    ;
    
SLASH
    :   '/'
    ;
    
fragment APOSTROPHE
  : '\'';
  
fragment QUOTATION
  : '\"';
    
WS 	:	(' '|'\t'|'\n'|'\r'|' ')+ { skip(); };
    
STRING
	:	(QUOTATION (options {greedy=false;} : .)* QUOTATION | APOSTROPHE (options {greedy=false;} : .)* APOSTROPHE)
	{ setText(getText().substring(1, getText().length()-1)); };

fragment
ESC_SEQ
    :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    |   UNICODE_ESC
    |   OCTAL_ESC
    ;

fragment
OCTAL_ESC
    :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7')
    ;

fragment
UNICODE_ESC	:   '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT   ;
    
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;


UINT :	'0'..'9'+;
    
INTEGER :	('+'|'-')? UINT;

DECIMAL
    :   ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
    |   '.' ('0'..'9')+ EXPONENT?
    |   ('0'..'9')+ EXPONENT;

fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;





© 2015 - 2025 Weber Informatics LLC | Privacy Policy