All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.uniroma2.art.coda.pearl.parser.antlr4.Pearl.g4 Maven / Gradle / Ivy

The newest version!
grammar Pearl;

options {
  language = Java;
}


pearlUnit 
  :
  //prologue annotationsDefinition? (r+=baseRule | r+=regex)+
  prologue annotationsDefinition? (baseRule | regex)*
  //-> ^(AST_PEARL_UNIT prologue annotationsDefinition? $r+)
  ;
  
prologue
  :
  prefixDecl* 
  //-> ^(AST_PROLOGUE prefixDecl*)
  ;
  
prefixDecl
  :
  ('prefix'|'PREFIX') PNAME_NS IRIREF
  //-> ^(AST_PREFIX_DECLARATION PNAME_NS IRIREF)
  ;
  
annotationsDefinition
  :
  //'annotations' '=' '{'
    (singleAnnotationDefinition)+
  //'}'
  //->
  //^(AST_ANNOTATION_DECLARATIONS  singleAnnotationDefinition+)
  ;
  
singleAnnotationDefinition
  :
  metaAnnotation* ('Annotation'|'ANNOTATION') annotationName=JAVA_IDENTIFIER ('{' paramsDefinition* '}' )?
  //->
  //^(AST_ANNOTATION_DEFINITION  $annotationName metaAnnotation*)
  ;
  
  
metaAnnotation
  :
  annotationName=LANGTAGORANNOTATION ('(' singleParamOrListOfNameParam ')')?
  //annotationName=LANGTAGORANNOTATION ('(' splnv+=singleParamOrListOfNameValue
  //  		(','splnv+=singleParamOrListOfNameValue)* ')')?
  //->
  //^(AST_META_ANNOTATION  $annotationName $splnv*)
  ;

singleParamOrListOfNameParam
    :
    singleP=singleParamOrListOfParam
    |
    names+=JAVA_IDENTIFIER '=' params+=singleParamOrListOfParam
    (',' names+=JAVA_IDENTIFIER '=' params+=singleParamOrListOfParam)*
    ;


/*singleParamOrListOfNameValue
	:
	singleParamOrListOfParam
	//-> ^(AST_ANNOTATION_PARAM singleParamOrListOfParam)
	|
	(name=JAVA_IDENTIFIER '=' value=singleParamOrListOfParam)
	//-> ^(AST_ANNOTATION_PARAM $name $value)
	;
*/

singleParamOrListOfParam
	:
	value=singleParam
	//-> $value
	|
	'{'param+=singleParam (',' param+=singleParam)* '}'
	//-> ^(AST_ANNOTATION_MULTI_VALUE $param+)
	;

singleParam
	:
	valueStr=JAVA_IDENTIFIER | INTEGER | DOUBLE | plch=placeholder | iri | literal
	//param=JAVA_IDENTIFIER | param=STRING_LITERAL1 | param=STRING_LITERAL2 | param=INTEGER | param=DOUBLE | paramPlc=placeholder
	//->
	//$param
	;

paramsDefinition
    :
    paramType paramName=JAVA_IDENTIFIER'('')' ('default' defaultValue )? ';'
    ;

paramType
    :
    JAVA_IDENTIFIER('[]')?
    ;

defaultValue
    :
    valueStr=JAVA_IDENTIFIER | INTEGER | DOUBLE | iri | literal
    ;

baseRule
  :
  //(r+=rule|r+=lazyRule|r+=forRegexRule)+
  //-> ^(AST_RULES $r+)
  //(r+=standardRule|r+=lazyRule|r+=forRegexRule)
  annotation* (standardRule|lazyRule|forRegexRule)
  //-> $r
  ;

standardRule
  :
  ('rule'|'RULE') uimaTypeName ruleId (('dependsOn'|'DEPENDSON'|'dependson') depend (',' depend)*)? '{'
  	conditionClause?
    bindingsClause?
    nodesClause?
    (insertClause|graphClause)?
    deleteClause?
    whereClause?
  '}'
  //-> ^(AST_RULE ^(AST_ID ruleId) uimaTypeName ^(AST_DEPENDSON_LIST depend+)? conditionClaus? bindingsClause? 
  //		nodesClause? insertClause? graphClause? deleteClause? whereClause?
  //)
  ;
  
  
  
lazyRule
  :
  'lazy' 'rule' uimaTypeName ruleId '{'
    nodesClause
  '}'
  //-> ^(AST_LAZY_RULE ^(AST_ID ruleId)? uimaTypeName nodesClause?)
  ;
  
  
forRegexRule
  :
  ('forRegex'|'forregex'|'FORREGEX') ('rule'|'RULE') uimaTypeName ruleId '{'
  	conditionClause?
    nodesClause
  '}'
  //-> ^(AST_FORREGEX_RULE ^(AST_ID ruleId)? uimaTypeName conditionClaus? nodesClause?)
  ;
  
uimaTypeName
  :
  javaQualifiedName
  //-> ^(AST_UIMA_TYPE_NAME javaQualifiedName)
  ;


conditionClause
	:
	('conditions'|'CONDITIONS') '=' '{'
		conditionDef+
	'}'
	//-> ^(AST_CONDITIONS_CLAUSE conditionDef+)
	;

conditionDef
	:
		featurePath CONDITIONOPERATOR '[' values+=string (',' values+=string)* ']' '.'
		//-> ^(AST_CONDITION_DEF featurePath CONDITIONOPERATOR ^(AST_STRING_LIST $value+))
	;

depend
	:
	dependType=JAVA_IDENTIFIER '(' (depRuleIds+=JAVA_IDENTIFIER | params+=dependParam ) 
	(',' (depRuleIds+=JAVA_IDENTIFIER | params+=dependParam ) )*
	')' ('as'|'AS') depRuleIdAs=JAVA_IDENTIFIER
	//-> ^(AST_DEPENDSON $dependType ^(AST_DEPENDSONRULEIDS $depRuleIds*) ^(AST_DEPENDSONPARAMS $params*) $depRuleIdAs )
	;


dependParam
	: 
	name=JAVA_IDENTIFIER '=' (value=JAVA_IDENTIFIER | value=INTEGER )
	//-> ^(AST_DEPENDSONSINGLEPARAM $name '=' $value)
	;
	

bindingsClause
  :
  ('bindings'|'BINDINGS') '=' '{'
    bindingDef+
  '}'
  //-> ^(AST_BINDINGS_CLAUSE bindingDef+)
  ;

bindingDef
  :
  bindingId=JAVA_IDENTIFIER featurePath bindingRuleId=JAVA_IDENTIFIER
  //-> ^(AST_BINDING_DEFINITION $bindingId featurePath $bindingRuleId)
  ;
  
ruleId
  :
  RULE_ID
  ;
  
RULE_ID
  :
  ('id:'|'ID:') JAVA_IDENTIFIER
  ;

nodesClause
  :
  ('nodes'|'NODES') '=' '{'
    nodeDef+
  '}'
  //-> ^(AST_NODES_CLAUSE nodeDef+)
  ;

//working version  
/*nodeDef
  :
  JAVA_IDENTIFIER projectionOperator featurePath
  -> ^(AST_NODES_DEF JAVA_IDENTIFIER projectionOperator featurePath)
  ;
*/

nodeDef
  :
  annotation* nodeName=JAVA_IDENTIFIER projectionOperator ((featurePath '.'?)| '.')
  //-> ^(AST_NODES_DEF ^(AST_ANNOTATIONS annotation*) $nodeName projectionOperator featurePath?)
  ;
  

annotation 
  :
  annotationName=LANGTAGORANNOTATION ('(' singleParamOrListOfNameParam ')')?
  //annotationName=LANGTAGORANNOTATION ('(' singleParamOrListOfNameValue
  //  		(','singleParamOrListOfNameValue)* ')')?
  //->
  //^(AST_ANNOTATION $annotationName $splnv*)
  ;
  
projectionOperator
  :
  type='uri' converters?
  //-> ^(AST_PROJECTION_OPERATOR 'uri' converters?)
  |
  type='literal' '^^' iri converters?
  //-> ^(AST_PROJECTION_OPERATOR 'literal' converters? ^(AST_DATATYPE iri) )
  |
  type='literal' LANGTAGORANNOTATION  converters?
  //-> ^(AST_PROJECTION_OPERATOR 'literal' converters? ^(AST_LANG LANGTAGORANNOTATION))
  |
  type='literal' converters?
  //-> ^(AST_PROJECTION_OPERATOR 'literal' converters?)
  |
  rep_plc=REP_PLC converters? 

  ;
  
converters
  :
  '(' individualConverter (',' individualConverter)*')'
  //-> ^(AST_CONVERTERS individualConverter+)
  ;
  
individualConverter
  :	
  iri converterAdditionalArguments?
  //-> ^(AST_CONVERTER iri converterAdditionalArguments?)
  | 
  rep_plc=REP_PLC converterAdditionalArguments?
  ;

converterAdditionalArguments
  :	
  '(' (converterArgumentExpression (',' converterArgumentExpression)*)? ')'
  //-> ^(AST_CONVERTER_ADDITIONAL_ARGUMENTS converterArgumentExpression*)
  ;

  
converterArgumentExpression
  :	
  literal | iri | converterPlaceholderArgument | converterMapArgument
  ;

converterPlaceholderArgument
  :
  VAR2
  //-> ^(AST_CONVERTER_PLACEHOLDER_ARGUMENT VAR2) 
  ;

/*
converterLiteralArgument
  :	  	
  converterStringLiteralArgument
  ;
   
converterStringLiteralArgument
  :	
  string
  //-> ^(AST_CONVERTER_STRING_LITERAL_ARGUMENT string)
  ;
*/

converterMapArgument
  :	
  '{' (mapEntry (',' mapEntry)*)? '}'
  //-> ^(AST_CONVERTER_MAP_ARGUMENT mapEntry*)
  ;
 
mapEntry
  :
  JAVA_IDENTIFIER '=' literal
  //-> ^(AST_MAP_ENTRY JAVA_IDENTIFIER literal)
  |
  JAVA_IDENTIFIER '=' iri
  //-> ^(AST_MAP_ENTRY JAVA_IDENTIFIER iri)
  |
  JAVA_IDENTIFIER '=' converterPlaceholderArgument
  //-> ^(AST_MAP_ENTRY JAVA_IDENTIFIER converterPlaceholderArgument)
  ;

featurePath
  :
  featurePathElement ('/' featurePathElement)*
  //-> ^(AST_FEATURE_PATH featurePathElement+)
  |
  rep_plc=REP_PLC
  ;

featurePathElement
  :
  JAVA_IDENTIFIER ('[' INTEGER ']')?
  //-> ^(AST_FEATURE_PATH_ELEMENT JAVA_IDENTIFIER INTEGER?)
  ;


graphClause
	:
	('graph'|'GRAPH') '=' graph
	//-> ^(AST_GRAPH_CLAUSE graph)
  ;

insertClause
  :
  ('insert'|'INSERT') '=' graph
  //-> ^(AST_INSERT_CLAUSE graph)
  ;

deleteClause
  :
  ('delete'|'DELETE') '=' graph
  //-> ^(AST_DELETE_CLAUSE graph)
  ;
 
javaQualifiedName
  :
  JAVA_IDENTIFIER ('.' JAVA_IDENTIFIER)*
  //-> ^(AST_JAVA_QUALIFIED_NAMED JAVA_IDENTIFIER+)
  ;

graph
  :
  '{'
  graphElement+
  '}'
  //-> ^(AST_GRAPH graphElement+)
  ;
 
 graphElement
  :
   
  ((annotation* graphTriple) | optionalGraphElement)
  ;
 
 optionalGraphElement
  :
  ('optional'|'OPTIONAL') '{' graphElement+ '}'
  //-> ^(AST_OPTIONAL graphElement+)
  ;
 
graphTriple
  :
   graphSubject graphPredicate graphObject '.'
  //-> ^(AST_GRAPH_TRIPLE graphSubject graphPredicate graphObject)
  ;

graphSubject
  :
  var |
  iri |
  blankNode |
  placeholder |
  rep_plc=REP_PLC
  ;
  
graphPredicate
  :
  var |
  iri |
  abbr |
  propPath |
  placeholder |
  rep_plc=REP_PLC
  ;
  
graphObject
  :
  var |
  iri |
  literal |
  blankNode |
  placeholder |
  rep_plc=REP_PLC
  ;

// new part about PropertyPath, which should be used ONLY for the WHERE section for the predicate (and not in the
// other graphTriple, so the parser should check that this is only used in the WHERE section and not in the GRAPH one,
// for example )
// taken from https://www.w3.org/TR/sparql11-query/
/*
[88]  	Path	  ::=  	PathAlternative
[89]  	PathAlternative	  ::=  	PathSequence ( '|' PathSequence )*
[90]  	PathSequence	  ::=  	PathEltOrInverse ( '/' PathEltOrInverse )*
[91]  	PathElt	  ::=  	PathPrimary PathMod?
[92]  	PathEltOrInverse	  ::=  	PathElt | '^' PathElt
[93]  	PathMod	  ::=  	'?' | '*' | '+'
[94]  	PathPrimary	  ::=  	iri | 'a' | '!' PathNegatedPropertySet | '(' Path ')'
[95]  	PathNegatedPropertySet	  ::=  	PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'
[96]  	PathOneInPropertySet	  ::=  	iri | 'a' | '^' ( iri | 'a' )

*/

propPath
 :
 pathAlternative
 ;

pathAlternative
 :
 pathSequence ( '|' pathSequence )*
 ;

pathSequence
 :
 pathEltOrInverse ( '/' pathEltOrInverse )*
 ;

pathElt
 :
 pathPrimary REGEX_SYMBOL?
 ;

pathEltOrInverse
 :
 pathElt
 |
 '^' pathElt
 ;

//USE REGEX_SYMBOL since it is already existing in the grammar
//PATHMOD : '?' | '*' | '+';

pathPrimary
 :
 iri
 |
 'a'
 |
 '!' pathNegatedPropertySet
 |
 '(' pathAlternative ')'
 ;

pathNegatedPropertySet
 :
 pathOneInPropertySet
 |
 //'(' ( pathOneInPropertySet ( '|' pathOneInPropertySet )* )? ')' // from the SPARQL grammar
 '(' pathOneInPropertySet ( '|' pathOneInPropertySet )*  ')'
  ;

pathOneInPropertySet
 :
 iri
 |
 'a'
 |
 '^' ( iri | 'a' )
 ;

// end new part about PropertyPath



var
  :
  VAR1
  //-> ^(AST_VAR VAR1)
  ;
  
placeholder
  :
  VAR2
  //-> ^(AST_PLACE_HOLDER VAR2)
  |
  VAR2 separator='.' JAVA_IDENTIFIER
  //-> ^(AST_PLACE_HOLDER VAR2 JAVA_IDENTIFIER)
  |
  VAR2 separator='..' JAVA_IDENTIFIER
  //-> ^(AST_PLACE_HOLDER VAR2 VAR2 JAVA_IDENTIFIER)
  ;
  
iri
  :
  IRIREF
  //-> ^(AST_IRI_REF IRIREF)
  |
  prefixedName
  //-> prefixedName
  ;
  
literal
  :
  string ( LANGTAGORANNOTATION | ( '^^' iri ) )?
  //-> ^(AST_LITERAL string (LANGTAGORANNOTATION)? (iri)?)
  ;
  
string
	:
	STRING_LITERAL1 | STRING_LITERAL2
	;  
  
blankNode
  :
  BLANK_NODE_LABEL
  //-> ^(AST_BLANK_NODE BLANK_NODE_LABEL)
  ;
  
prefixedName
  :
  PNAME_LN 
  //-> ^(AST_PREFIXED_NAME PNAME_LN)
  |
  pn=PNAME_NS
  //-> ^(AST_PREFIXED_NAME PNAME_NS)
  ;
  
abbr
  :
  'a'|'A'
  //-> ^(AST_ABBR 'a')
  ;
  
  
//Not so Perfect, cause include the case _ that is not allowed in langtag.
//langtag
//  :
//  AT (JAVA_IDENTIFIER)+ ('-' (JAVA_IDENTIFIER)+)*
//  ;
  

whereClause
  :
  ('where'|'WHERE') '=' graph
  //-> ^(AST_WHERE_CLAUSE graph)
  ;
  
  
// part regardings the regex

 
regex
  :
  ('regex'|'REGEX') ruleId regexPattern '->' graphClause
  //-> ^(AST_REGEX ^(AST_ID ruleId) regexPattern graphClause)
  ;
 
regexPattern
  :
  regexWithOr
  //-> ^(AST_REGEX_PATTERN regexWithOr)
  ;
  
regexWithOr  
  : 
  regexSequenceElement ('|' regexSequenceElement)*
  //regexOr+=regexSequenceElement ('|' regexOr+=regexSequenceElement)*
  //-> ^(AST_REGEX_OR $regexOr+)
  ;
 	
regexSequenceElement
  :
  regexBaseElementWithSymbol+ 
  //-> ^(AST_REGEX_SEQUENCE regexBaseElementWithSymbol+)
  ;	

regexBaseElementWithSymbol
  :
  regexBaseElement  REGEX_SYMBOL?
  //-> ^(AST_REGEX_BASE ^(AST_REGEX_SYMBOL REGEX_SYMBOL?) regexBaseElement )
  ;

regexBaseElement
  :
  '['maxDist=INTEGER? regexRuleId=JAVA_IDENTIFIER ('as'|'AS') internalId=JAVA_IDENTIFIER ']'
	//-> $regexRuleId $internalId INTEGER?
	|
	'(' regexWithOr ')'
	//-> regexWithOr
  ;


REGEX_SYMBOL
	:
	'+'|'?'|'*'
	;


// end new regex part 

WS:     (' ' | '\t' | '\f' |'\r')+ -> skip;
NEWLINE:  '\n' -> skip;
COMMENT: '//' .*? ('\n' | '\r') -> skip;
MULTILINE_COMMENT:  '/*' .*? '*/' -> skip;


 
IRIREF
  :
  '<' (~('<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\u0000'..'\u0020'))* '>'
  ;
  
VAR1
  :
  '?' VARNAME
  ;

VAR2
  :
  '$' VARNAME
  ;

PNAME_NS
  :
  PN_PREFIX? ':'
  ;
  
PNAME_LN
  :
  PNAME_NS PN_LOCAL
  ;

BLANK_NODE_LABEL
  :
 // '_:' PN_LOCAL( PN_CHARS_U | '0'..'9' ) ((PN_CHARS|'.')* PN_CHARS)?
 '_:' PN_LOCAL
  ;

/*LANGTAG // old version 
	:
	AT ('a'..'z'|'A'..'Z')+ ('-' ('a'..'z'|'A'..'Z'|'0'..'9')+)*
	;*/

LANGTAGORANNOTATION
	:
	AT JAVA_IDENTIFIER ('-' JAVA_IDENTIFIER)*
	;


AT
  :
  '@'
  ;

STRING_LITERAL1 	  
	:
	'\'' ( (~('\u0027'|'\u005C'|'\u000A'|'\u000D')) | ECHAR )* '\''
	;


STRING_LITERAL2 	  
	:
	'"' ( (~('\u0022'|'\u005C'|'\u000A'|'\u000D')) | ECHAR )* '"'
	;
	


CONDITIONOPERATOR
	:
	'IN' | 'in' | 'NOT IN' | 'not in'
	;


REP_PLC
  :
  '%' JAVA_LETTER+ '%'
  ;

	
fragment
ECHAR
	:
	'\\' ('t'|'b'|'n'|'r'|'f'|'\\'|'\''|'"')
	;	

fragment
PN_LOCAL
  :
  (PN_CHARS_U | ':' | '0'..'9' | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )?
  ;

fragment
PLX
  :
  PERCENT | PN_LOCAL_ESC
  ;

fragment
PERCENT
  :
  '%' HEX HEX
  ;
  
fragment
HEX
  :
  '0'..'9' | 'A'..'F' | 'a'..'f'
  ;

fragment
PN_LOCAL_ESC
  :
  '\\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?'
  | '#' | '@' | '%' )
  ;
fragment
PN_PREFIX
  :
  PN_CHARS_BASE ((PN_CHARS/*|'.'*/)* PN_CHARS)? // Dot removed since it causes a bug in the generated Lexer
  ;

fragment
PN_CHARS_BASE
  :
  'A'..'Z' | 'a'..'z'| '\u00C0'..'\u00D6' | '\u00D8'..'\u00F6'| '\u00F8'..'\u02FF' | '\u0370'..'\u037D'|
  '\u037F'..'\u1FFF' | '\u200C'..'\u200D'| '\u2070'..'\u218F' | '\u2C00'..'\u2FEF' | '\u3001'..'\uD7FF' |
  '\uF900'..'\uFDCF'  | '\uFDF0'..'\uFFFD' | '\u{10000}'..'\u{EFFFF}'
  ;

fragment
PN_CHARS_U
  :
  PN_CHARS_BASE | '_'
  ;

fragment
PN_CHARS
  :
  PN_CHARS_U | '-' | '0'..'9' | '\u00B7' | '\u0300'..'\u036F' | '\u203F'..'\u2040'
  ;


fragment  
VARNAME
  :
  JAVA_IDENTIFIER
  //( PN_CHARS_U | '0'..'9') ( PN_CHARS_U | '0'..'9' | '\u00B7' | '\u0300'..'\u036F' | '\u203F'..'\u2040')*
  ;
  
JAVA_IDENTIFIER
  :
  JAVA_LETTER JAVA_LETTER_OR_DIGIT*
  ;
  
fragment
JAVA_LETTER
  :
  'a'..'z' | 'A'..'Z' | '_'
  ;
  
fragment
JAVA_LETTER_OR_DIGIT
  :
  JAVA_LETTER | '0'..'9'
  ;
  
INTEGER
  :
  '0'..'9'+
  ;
  
DOUBLE
  :
  //'0.'INTEGER
  INTEGER'.'INTEGER
  ; 





© 2015 - 2024 Weber Informatics LLC | Privacy Policy