it.uniroma2.art.coda.pearl.parser.antlr4.Pearl.g4 Maven / Gradle / Ivy
The newest version!
grammar Pearl;
options {
language = Java;
}
pearlUnit
:
//prologue annotationsDefinition? (r+=baseRule | r+=regex)+
prologue annotationsDefinition? (baseRule | regex)*
//-> ^(AST_PEARL_UNIT prologue annotationsDefinition? $r+)
;
prologue
:
prefixDecl*
//-> ^(AST_PROLOGUE prefixDecl*)
;
prefixDecl
:
('prefix'|'PREFIX') PNAME_NS IRIREF
//-> ^(AST_PREFIX_DECLARATION PNAME_NS IRIREF)
;
annotationsDefinition
:
//'annotations' '=' '{'
(singleAnnotationDefinition)+
//'}'
//->
//^(AST_ANNOTATION_DECLARATIONS singleAnnotationDefinition+)
;
singleAnnotationDefinition
:
metaAnnotation* ('Annotation'|'ANNOTATION') annotationName=JAVA_IDENTIFIER ('{' paramsDefinition* '}' )?
//->
//^(AST_ANNOTATION_DEFINITION $annotationName metaAnnotation*)
;
metaAnnotation
:
annotationName=LANGTAGORANNOTATION ('(' singleParamOrListOfNameParam ')')?
//annotationName=LANGTAGORANNOTATION ('(' splnv+=singleParamOrListOfNameValue
// (','splnv+=singleParamOrListOfNameValue)* ')')?
//->
//^(AST_META_ANNOTATION $annotationName $splnv*)
;
singleParamOrListOfNameParam
:
singleP=singleParamOrListOfParam
|
names+=JAVA_IDENTIFIER '=' params+=singleParamOrListOfParam
(',' names+=JAVA_IDENTIFIER '=' params+=singleParamOrListOfParam)*
;
/*singleParamOrListOfNameValue
:
singleParamOrListOfParam
//-> ^(AST_ANNOTATION_PARAM singleParamOrListOfParam)
|
(name=JAVA_IDENTIFIER '=' value=singleParamOrListOfParam)
//-> ^(AST_ANNOTATION_PARAM $name $value)
;
*/
singleParamOrListOfParam
:
value=singleParam
//-> $value
|
'{'param+=singleParam (',' param+=singleParam)* '}'
//-> ^(AST_ANNOTATION_MULTI_VALUE $param+)
;
singleParam
:
valueStr=JAVA_IDENTIFIER | INTEGER | DOUBLE | plch=placeholder | iri | literal
//param=JAVA_IDENTIFIER | param=STRING_LITERAL1 | param=STRING_LITERAL2 | param=INTEGER | param=DOUBLE | paramPlc=placeholder
//->
//$param
;
paramsDefinition
:
paramType paramName=JAVA_IDENTIFIER'('')' ('default' defaultValue )? ';'
;
paramType
:
JAVA_IDENTIFIER('[]')?
;
defaultValue
:
valueStr=JAVA_IDENTIFIER | INTEGER | DOUBLE | iri | literal
;
baseRule
:
//(r+=rule|r+=lazyRule|r+=forRegexRule)+
//-> ^(AST_RULES $r+)
//(r+=standardRule|r+=lazyRule|r+=forRegexRule)
annotation* (standardRule|lazyRule|forRegexRule)
//-> $r
;
standardRule
:
('rule'|'RULE') uimaTypeName ruleId (('dependsOn'|'DEPENDSON'|'dependson') depend (',' depend)*)? '{'
conditionClause?
bindingsClause?
nodesClause?
(insertClause|graphClause)?
deleteClause?
whereClause?
'}'
//-> ^(AST_RULE ^(AST_ID ruleId) uimaTypeName ^(AST_DEPENDSON_LIST depend+)? conditionClaus? bindingsClause?
// nodesClause? insertClause? graphClause? deleteClause? whereClause?
//)
;
lazyRule
:
'lazy' 'rule' uimaTypeName ruleId '{'
nodesClause
'}'
//-> ^(AST_LAZY_RULE ^(AST_ID ruleId)? uimaTypeName nodesClause?)
;
forRegexRule
:
('forRegex'|'forregex'|'FORREGEX') ('rule'|'RULE') uimaTypeName ruleId '{'
conditionClause?
nodesClause
'}'
//-> ^(AST_FORREGEX_RULE ^(AST_ID ruleId)? uimaTypeName conditionClaus? nodesClause?)
;
uimaTypeName
:
javaQualifiedName
//-> ^(AST_UIMA_TYPE_NAME javaQualifiedName)
;
conditionClause
:
('conditions'|'CONDITIONS') '=' '{'
conditionDef+
'}'
//-> ^(AST_CONDITIONS_CLAUSE conditionDef+)
;
conditionDef
:
featurePath CONDITIONOPERATOR '[' values+=string (',' values+=string)* ']' '.'
//-> ^(AST_CONDITION_DEF featurePath CONDITIONOPERATOR ^(AST_STRING_LIST $value+))
;
depend
:
dependType=JAVA_IDENTIFIER '(' (depRuleIds+=JAVA_IDENTIFIER | params+=dependParam )
(',' (depRuleIds+=JAVA_IDENTIFIER | params+=dependParam ) )*
')' ('as'|'AS') depRuleIdAs=JAVA_IDENTIFIER
//-> ^(AST_DEPENDSON $dependType ^(AST_DEPENDSONRULEIDS $depRuleIds*) ^(AST_DEPENDSONPARAMS $params*) $depRuleIdAs )
;
dependParam
:
name=JAVA_IDENTIFIER '=' (value=JAVA_IDENTIFIER | value=INTEGER )
//-> ^(AST_DEPENDSONSINGLEPARAM $name '=' $value)
;
bindingsClause
:
('bindings'|'BINDINGS') '=' '{'
bindingDef+
'}'
//-> ^(AST_BINDINGS_CLAUSE bindingDef+)
;
bindingDef
:
bindingId=JAVA_IDENTIFIER featurePath bindingRuleId=JAVA_IDENTIFIER
//-> ^(AST_BINDING_DEFINITION $bindingId featurePath $bindingRuleId)
;
ruleId
:
RULE_ID
;
RULE_ID
:
('id:'|'ID:') JAVA_IDENTIFIER
;
nodesClause
:
('nodes'|'NODES') '=' '{'
nodeDef+
'}'
//-> ^(AST_NODES_CLAUSE nodeDef+)
;
//working version
/*nodeDef
:
JAVA_IDENTIFIER projectionOperator featurePath
-> ^(AST_NODES_DEF JAVA_IDENTIFIER projectionOperator featurePath)
;
*/
nodeDef
:
annotation* nodeName=JAVA_IDENTIFIER projectionOperator ((featurePath '.'?)| '.')
//-> ^(AST_NODES_DEF ^(AST_ANNOTATIONS annotation*) $nodeName projectionOperator featurePath?)
;
annotation
:
annotationName=LANGTAGORANNOTATION ('(' singleParamOrListOfNameParam ')')?
//annotationName=LANGTAGORANNOTATION ('(' singleParamOrListOfNameValue
// (','singleParamOrListOfNameValue)* ')')?
//->
//^(AST_ANNOTATION $annotationName $splnv*)
;
projectionOperator
:
type='uri' converters?
//-> ^(AST_PROJECTION_OPERATOR 'uri' converters?)
|
type='literal' '^^' iri converters?
//-> ^(AST_PROJECTION_OPERATOR 'literal' converters? ^(AST_DATATYPE iri) )
|
type='literal' LANGTAGORANNOTATION converters?
//-> ^(AST_PROJECTION_OPERATOR 'literal' converters? ^(AST_LANG LANGTAGORANNOTATION))
|
type='literal' converters?
//-> ^(AST_PROJECTION_OPERATOR 'literal' converters?)
|
rep_plc=REP_PLC converters?
;
converters
:
'(' individualConverter (',' individualConverter)*')'
//-> ^(AST_CONVERTERS individualConverter+)
;
individualConverter
:
iri converterAdditionalArguments?
//-> ^(AST_CONVERTER iri converterAdditionalArguments?)
|
rep_plc=REP_PLC converterAdditionalArguments?
;
converterAdditionalArguments
:
'(' (converterArgumentExpression (',' converterArgumentExpression)*)? ')'
//-> ^(AST_CONVERTER_ADDITIONAL_ARGUMENTS converterArgumentExpression*)
;
converterArgumentExpression
:
literal | iri | converterPlaceholderArgument | converterMapArgument
;
converterPlaceholderArgument
:
VAR2
//-> ^(AST_CONVERTER_PLACEHOLDER_ARGUMENT VAR2)
;
/*
converterLiteralArgument
:
converterStringLiteralArgument
;
converterStringLiteralArgument
:
string
//-> ^(AST_CONVERTER_STRING_LITERAL_ARGUMENT string)
;
*/
converterMapArgument
:
'{' (mapEntry (',' mapEntry)*)? '}'
//-> ^(AST_CONVERTER_MAP_ARGUMENT mapEntry*)
;
mapEntry
:
JAVA_IDENTIFIER '=' literal
//-> ^(AST_MAP_ENTRY JAVA_IDENTIFIER literal)
|
JAVA_IDENTIFIER '=' iri
//-> ^(AST_MAP_ENTRY JAVA_IDENTIFIER iri)
|
JAVA_IDENTIFIER '=' converterPlaceholderArgument
//-> ^(AST_MAP_ENTRY JAVA_IDENTIFIER converterPlaceholderArgument)
;
featurePath
:
featurePathElement ('/' featurePathElement)*
//-> ^(AST_FEATURE_PATH featurePathElement+)
|
rep_plc=REP_PLC
;
featurePathElement
:
JAVA_IDENTIFIER ('[' INTEGER ']')?
//-> ^(AST_FEATURE_PATH_ELEMENT JAVA_IDENTIFIER INTEGER?)
;
graphClause
:
('graph'|'GRAPH') '=' graph
//-> ^(AST_GRAPH_CLAUSE graph)
;
insertClause
:
('insert'|'INSERT') '=' graph
//-> ^(AST_INSERT_CLAUSE graph)
;
deleteClause
:
('delete'|'DELETE') '=' graph
//-> ^(AST_DELETE_CLAUSE graph)
;
javaQualifiedName
:
JAVA_IDENTIFIER ('.' JAVA_IDENTIFIER)*
//-> ^(AST_JAVA_QUALIFIED_NAMED JAVA_IDENTIFIER+)
;
graph
:
'{'
graphElement+
'}'
//-> ^(AST_GRAPH graphElement+)
;
graphElement
:
((annotation* graphTriple) | optionalGraphElement)
;
optionalGraphElement
:
('optional'|'OPTIONAL') '{' graphElement+ '}'
//-> ^(AST_OPTIONAL graphElement+)
;
graphTriple
:
graphSubject graphPredicate graphObject '.'
//-> ^(AST_GRAPH_TRIPLE graphSubject graphPredicate graphObject)
;
graphSubject
:
var |
iri |
blankNode |
placeholder |
rep_plc=REP_PLC
;
graphPredicate
:
var |
iri |
abbr |
propPath |
placeholder |
rep_plc=REP_PLC
;
graphObject
:
var |
iri |
literal |
blankNode |
placeholder |
rep_plc=REP_PLC
;
// new part about PropertyPath, which should be used ONLY for the WHERE section for the predicate (and not in the
// other graphTriple, so the parser should check that this is only used in the WHERE section and not in the GRAPH one,
// for example )
// taken from https://www.w3.org/TR/sparql11-query/
/*
[88] Path ::= PathAlternative
[89] PathAlternative ::= PathSequence ( '|' PathSequence )*
[90] PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse )*
[91] PathElt ::= PathPrimary PathMod?
[92] PathEltOrInverse ::= PathElt | '^' PathElt
[93] PathMod ::= '?' | '*' | '+'
[94] PathPrimary ::= iri | 'a' | '!' PathNegatedPropertySet | '(' Path ')'
[95] PathNegatedPropertySet ::= PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'
[96] PathOneInPropertySet ::= iri | 'a' | '^' ( iri | 'a' )
*/
propPath
:
pathAlternative
;
pathAlternative
:
pathSequence ( '|' pathSequence )*
;
pathSequence
:
pathEltOrInverse ( '/' pathEltOrInverse )*
;
pathElt
:
pathPrimary REGEX_SYMBOL?
;
pathEltOrInverse
:
pathElt
|
'^' pathElt
;
//USE REGEX_SYMBOL since it is already existing in the grammar
//PATHMOD : '?' | '*' | '+';
pathPrimary
:
iri
|
'a'
|
'!' pathNegatedPropertySet
|
'(' pathAlternative ')'
;
pathNegatedPropertySet
:
pathOneInPropertySet
|
//'(' ( pathOneInPropertySet ( '|' pathOneInPropertySet )* )? ')' // from the SPARQL grammar
'(' pathOneInPropertySet ( '|' pathOneInPropertySet )* ')'
;
pathOneInPropertySet
:
iri
|
'a'
|
'^' ( iri | 'a' )
;
// end new part about PropertyPath
var
:
VAR1
//-> ^(AST_VAR VAR1)
;
placeholder
:
VAR2
//-> ^(AST_PLACE_HOLDER VAR2)
|
VAR2 separator='.' JAVA_IDENTIFIER
//-> ^(AST_PLACE_HOLDER VAR2 JAVA_IDENTIFIER)
|
VAR2 separator='..' JAVA_IDENTIFIER
//-> ^(AST_PLACE_HOLDER VAR2 VAR2 JAVA_IDENTIFIER)
;
iri
:
IRIREF
//-> ^(AST_IRI_REF IRIREF)
|
prefixedName
//-> prefixedName
;
literal
:
string ( LANGTAGORANNOTATION | ( '^^' iri ) )?
//-> ^(AST_LITERAL string (LANGTAGORANNOTATION)? (iri)?)
;
string
:
STRING_LITERAL1 | STRING_LITERAL2
;
blankNode
:
BLANK_NODE_LABEL
//-> ^(AST_BLANK_NODE BLANK_NODE_LABEL)
;
prefixedName
:
PNAME_LN
//-> ^(AST_PREFIXED_NAME PNAME_LN)
|
pn=PNAME_NS
//-> ^(AST_PREFIXED_NAME PNAME_NS)
;
abbr
:
'a'|'A'
//-> ^(AST_ABBR 'a')
;
//Not so Perfect, cause include the case _ that is not allowed in langtag.
//langtag
// :
// AT (JAVA_IDENTIFIER)+ ('-' (JAVA_IDENTIFIER)+)*
// ;
whereClause
:
('where'|'WHERE') '=' graph
//-> ^(AST_WHERE_CLAUSE graph)
;
// part regardings the regex
regex
:
('regex'|'REGEX') ruleId regexPattern '->' graphClause
//-> ^(AST_REGEX ^(AST_ID ruleId) regexPattern graphClause)
;
regexPattern
:
regexWithOr
//-> ^(AST_REGEX_PATTERN regexWithOr)
;
regexWithOr
:
regexSequenceElement ('|' regexSequenceElement)*
//regexOr+=regexSequenceElement ('|' regexOr+=regexSequenceElement)*
//-> ^(AST_REGEX_OR $regexOr+)
;
regexSequenceElement
:
regexBaseElementWithSymbol+
//-> ^(AST_REGEX_SEQUENCE regexBaseElementWithSymbol+)
;
regexBaseElementWithSymbol
:
regexBaseElement REGEX_SYMBOL?
//-> ^(AST_REGEX_BASE ^(AST_REGEX_SYMBOL REGEX_SYMBOL?) regexBaseElement )
;
regexBaseElement
:
'['maxDist=INTEGER? regexRuleId=JAVA_IDENTIFIER ('as'|'AS') internalId=JAVA_IDENTIFIER ']'
//-> $regexRuleId $internalId INTEGER?
|
'(' regexWithOr ')'
//-> regexWithOr
;
REGEX_SYMBOL
:
'+'|'?'|'*'
;
// end new regex part
WS: (' ' | '\t' | '\f' |'\r')+ -> skip;
NEWLINE: '\n' -> skip;
COMMENT: '//' .*? ('\n' | '\r') -> skip;
MULTILINE_COMMENT: '/*' .*? '*/' -> skip;
IRIREF
:
'<' (~('<' | '>' | '"' | '{' | '}' | '|' | '^' | '`' | '\u0000'..'\u0020'))* '>'
;
VAR1
:
'?' VARNAME
;
VAR2
:
'$' VARNAME
;
PNAME_NS
:
PN_PREFIX? ':'
;
PNAME_LN
:
PNAME_NS PN_LOCAL
;
BLANK_NODE_LABEL
:
// '_:' PN_LOCAL( PN_CHARS_U | '0'..'9' ) ((PN_CHARS|'.')* PN_CHARS)?
'_:' PN_LOCAL
;
/*LANGTAG // old version
:
AT ('a'..'z'|'A'..'Z')+ ('-' ('a'..'z'|'A'..'Z'|'0'..'9')+)*
;*/
LANGTAGORANNOTATION
:
AT JAVA_IDENTIFIER ('-' JAVA_IDENTIFIER)*
;
AT
:
'@'
;
STRING_LITERAL1
:
'\'' ( (~('\u0027'|'\u005C'|'\u000A'|'\u000D')) | ECHAR )* '\''
;
STRING_LITERAL2
:
'"' ( (~('\u0022'|'\u005C'|'\u000A'|'\u000D')) | ECHAR )* '"'
;
CONDITIONOPERATOR
:
'IN' | 'in' | 'NOT IN' | 'not in'
;
REP_PLC
:
'%' JAVA_LETTER+ '%'
;
fragment
ECHAR
:
'\\' ('t'|'b'|'n'|'r'|'f'|'\\'|'\''|'"')
;
fragment
PN_LOCAL
:
(PN_CHARS_U | ':' | '0'..'9' | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )?
;
fragment
PLX
:
PERCENT | PN_LOCAL_ESC
;
fragment
PERCENT
:
'%' HEX HEX
;
fragment
HEX
:
'0'..'9' | 'A'..'F' | 'a'..'f'
;
fragment
PN_LOCAL_ESC
:
'\\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?'
| '#' | '@' | '%' )
;
fragment
PN_PREFIX
:
PN_CHARS_BASE ((PN_CHARS/*|'.'*/)* PN_CHARS)? // Dot removed since it causes a bug in the generated Lexer
;
fragment
PN_CHARS_BASE
:
'A'..'Z' | 'a'..'z'| '\u00C0'..'\u00D6' | '\u00D8'..'\u00F6'| '\u00F8'..'\u02FF' | '\u0370'..'\u037D'|
'\u037F'..'\u1FFF' | '\u200C'..'\u200D'| '\u2070'..'\u218F' | '\u2C00'..'\u2FEF' | '\u3001'..'\uD7FF' |
'\uF900'..'\uFDCF' | '\uFDF0'..'\uFFFD' | '\u{10000}'..'\u{EFFFF}'
;
fragment
PN_CHARS_U
:
PN_CHARS_BASE | '_'
;
fragment
PN_CHARS
:
PN_CHARS_U | '-' | '0'..'9' | '\u00B7' | '\u0300'..'\u036F' | '\u203F'..'\u2040'
;
fragment
VARNAME
:
JAVA_IDENTIFIER
//( PN_CHARS_U | '0'..'9') ( PN_CHARS_U | '0'..'9' | '\u00B7' | '\u0300'..'\u036F' | '\u203F'..'\u2040')*
;
JAVA_IDENTIFIER
:
JAVA_LETTER JAVA_LETTER_OR_DIGIT*
;
fragment
JAVA_LETTER
:
'a'..'z' | 'A'..'Z' | '_'
;
fragment
JAVA_LETTER_OR_DIGIT
:
JAVA_LETTER | '0'..'9'
;
INTEGER
:
'0'..'9'+
;
DOUBLE
:
//'0.'INTEGER
INTEGER'.'INTEGER
;