All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParser.jj Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
options {
    UNICODE_INPUT = true;
    // Option for multiple instances
    STATIC = false;
    // Private classes
    SUPPORT_CLASS_VISIBILITY_PUBLIC = false;
}

PARSER_BEGIN(TokenSequenceParser)
package edu.stanford.nlp.ling.tokensregex.parser;
// all generated classes are in this package

//imports
import edu.stanford.nlp.ling.tokensregex.types.*;
import edu.stanford.nlp.ling.tokensregex.*;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.ArrayMap;
import edu.stanford.nlp.util.Pair;
import java.io.Reader;
import java.io.StringReader;
import java.util.*;
import java.lang.RuntimeException;

public class TokenSequenceParser implements SequencePattern.Parser {
    public TokenSequenceParser() {}

    public CoreMapExpressionExtractor getExpressionExtractor(Env env, Reader r) throws ParseException, TokenSequenceParseException {
        try{
            TokenSequenceParser p = new TokenSequenceParser(r);
            List rules = p.RuleList(env);
            return new CoreMapExpressionExtractor(env, rules);
        }catch(TokenMgrError error){
            throw new TokenSequenceParseException("Parsing failed. Error: " + error);
        }
    }

    public void updateExpressionExtractor(CoreMapExpressionExtractor extractor, Reader r) throws ParseException, TokenSequenceParseException {
        try{
            TokenSequenceParser p = new TokenSequenceParser(r);
            List rules = p.RuleList(extractor.getEnv());
            extractor.appendRules(rules);
        }catch(TokenMgrError error){
            throw new TokenSequenceParseException("Parsing failed. Error: " + error);
        }
    }

	public SequencePattern.PatternExpr parseSequence(Env env, String s) throws ParseException, TokenSequenceParseException {
        try{
            TokenSequenceParser p = new TokenSequenceParser(new StringReader(s));
            return p.SeqRegex(env);
        }catch(TokenMgrError error){
            throw new TokenSequenceParseException("Parsing failed. Error: " + error);
        }
  	}

	public Pair> parseSequenceWithAction(Env env, String s) throws ParseException, TokenSequenceParseException {
        try{
            TokenSequenceParser p = new TokenSequenceParser(new StringReader(s));
            return p.SeqRegexWithAction(env);
        }catch(TokenMgrError error){
            throw new TokenSequenceParseException("Parsing failed. Error: " + error);
        }
  	}

	public SequencePattern.PatternExpr parseNode(Env env, String s) throws ParseException, TokenSequenceParseException {
        try{
            TokenSequenceParser p = new TokenSequenceParser(new StringReader(s));
            NodePattern n = p.Node(env);
            return new SequencePattern.NodePatternExpr(n);
        }catch(TokenMgrError error){
            throw new TokenSequenceParseException("Parsing failed. Error: " + error);
        }
  	}

    private static Integer parseInteger(String str) {
      if (str.startsWith("+")) {
        return Integer.valueOf(str.substring(1));
      } else {
        return Integer.valueOf(str);
      }
    }

    private static Long parseLongInteger(String str) {
      if (str.endsWith("L")) {
        str = str.substring(0, str.length()-1);
      }
      if (str.startsWith("+")) {
        return Long.valueOf(str.substring(1));
      } else {
        return Long.valueOf(str);
      }
    }

    private String parseQuotedString(String str) {
      // todo [cdm 2014]: I suspect this doesn't work because of how JavaCC escapes \
      // Trim start/end quote and unescape \"
      return str.substring(1,str.length()-1).replaceAll("\\\\\"", "\"");
    }

    private void appendSpecialTokens( StringBuilder sb, Token st ) {
      if( st != null ) {
        appendSpecialTokens( sb, st.specialToken );
        sb.append( st.image );
      }
    }

    private String getStringFromTokens(Token head, Token tail, boolean includeSpecial) {
      StringBuilder sb = new StringBuilder();
      for( Token p = head ; p != tail ; p = p.next ) {
        if (includeSpecial) {
          appendSpecialTokens( sb, p.specialToken );
        }
        sb.append( p.image );
      }
      return sb.toString();
    }
}

PARSER_END(TokenSequenceParser)

SKIP:
{
	" "
|	"\r"
|	"\n"
|	"\t"
|   < "//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")>
|   < "#" (~["\n","\r"])* ("\n" | "\r" | "\r\n")>
}

TOKEN:
{
	< IDENTIFIER: ( ["A"-"Z","a"-"z", "_"] ) (["A"-"Z","a"-"z","0"-"9", "_"] )* >
|	< REGEXVAR: "$" ( ["A"-"Z","a"-"z", "_"] ) (["A"-"Z","a"-"z","0"-"9", "_"] )* >
|	< REGEXGROUP: "$" (["0"-"9"])+ >
|	< REGEXMRVAR: "$$" ( ["A"-"Z","a"-"z", "_"] ) (["A"-"Z","a"-"z","0"-"9", "_"] )* >
|	< REGEXMRGROUP: "$$" (["0"-"9"])+ >
|   < BACKREF: "\\" (["0"-"9"])+ >
|   < NONNEGINT: (["0"-"9"])+ >
|   < INT: (["+","-"])? (["0"-"9"])+ >
|   < LONGINT: (["+","-"])? (["0"-"9"])+ "L" >
|   < REAL: (["+","-"])? (["0"-"9","E","-"])+ "." (["0"-"9","E","-"])+ >
|	< REGEX: "/" ("\\/"|~["\n","\r","/"])* "/" >
|	< STR: "\"" ("\\\""|~["\n","\r", "\""])* "\"" >
|   < NUMCMP: "<=" | ">=" | ">" | "<" | "==" | "!=" >
|   < STRREGEXCMP: "=~" | "!=" >
|   < STRSIMPLE: (["A"-"Z","a"-"z","0"-"9", "_"] )+ >
}


List RuleList(Env env) : {
  List rules = new ArrayList();
  SequenceMatchRules.Rule rule;
} {
    ( rule = Rule(env) { rules.add(rule); }
    )*
    { return rules; }
}

SequenceMatchRules.Rule Rule(Env env) : {
  SequenceMatchRules.Rule rule;
  Expressions.CompositeValue cv;
} {
  ( LOOKAHEAD(3)
    rule = ExpressionExtractorRule(env)
    |
    LOOKAHEAD(3)
    rule = AssignmentRule(env)
    |
    LOOKAHEAD(3)
    cv = CompositeFieldValue(env) { rule = SequenceMatchRules.createRule(env, cv); } )
  { return rule; }
}

SequenceMatchRules.Rule ExpressionExtractorRule(Env env) : {
  SequencePattern.PatternExpr expr;
  String stringRegex;
  Expression result;
  Token ruleTypeToken;
} {
    LOOKAHEAD(2)
  ( "{" stringRegex = StringRegex(env) "=>" result = Expression(env) "}" )
  { return SequenceMatchRules.createExtractionRule(env, null, stringRegex, result); }
  |
    LOOKAHEAD(2)
  ( "{" "(" expr = SeqRegex(env) ")" "=>" result = Expression(env) "}" )
  { return SequenceMatchRules.createExtractionRule(env, null, TokenSequencePattern.compile(expr), result); }
  |
    LOOKAHEAD(2)
  ( "{" "tokens:" "(" expr = SeqRegex(env) ")" "=>" result = Expression(env) "}" )
  { return SequenceMatchRules.createTokenPatternRule(env, expr, result); }
  |
    LOOKAHEAD(2)
  ( "{" "text:" stringRegex = StringRegex(env) "=>" result = Expression(env) "}" )
  { return SequenceMatchRules.createTextPatternRule(env, stringRegex, result); }
}

SequenceMatchRules.Rule AssignmentRule(Env env) : {
  AssignableExpression var;
  Expression result;
} {
  ( var = AssignableExpression(env)  "=" result = Expression(env) (";")? )
  { return SequenceMatchRules.createAssignmentRule(env, var, result);
  }
}

AssignableExpression AssignableExpression(Env env) : {
  AssignableExpression expr;
} {
  expr = AssignableNestedVarExpression(env)
  { return expr; }
}

Expression Expression(Env env) : {
  Expression expr;
} {
  (
    LOOKAHEAD(5)
    expr = NestedFunctionCallExpression(env) |
    LOOKAHEAD(5)
    expr = NestedVarExpression(env) |
    LOOKAHEAD(5)
    expr = ValueExpression(env) |
    LOOKAHEAD(5)
    expr = ListExpression(env) |
    LOOKAHEAD(5)
    expr = ListExpression2(env) |
    LOOKAHEAD(5)
    expr = CaseExpression(env)
  )
  { return expr; }
}

int Index() : {
  Token t;
} {
  ( "[" t = IntegerToken() "]" )
  { return Integer.parseInt(t.image); }
}

Expression FunctionCallExpression(Env env) : {
  Token typeToken;
  Expression param;
  List params = new ArrayList();
} {
  (  typeToken = 
   ( "("
     (
       param = Expression(env) { params.add(param); }
       ( "," param = Expression(env) { params.add(param); } )*
     )?
     ")"
   )
  )
  { return new Expressions.FunctionCallExpression(typeToken.image, params); }
}

Value ValueExpression(Env env) : {
  Value expr;
} {
  (
    expr = BasicValue(env)
    |
    expr = CompositeFieldValue(env)
  )
  { return expr; }
}

Expressions.CompositeValue CompositeFieldValue(Env env) : {
    Map attributes = new ArrayMap();
} {
	(
	    "{"
	        (
	            FieldValue(env, attributes)
	        )

			( ("," | ";")  FieldValue(env, attributes)
		    )*
		"}"
    )
  	{
  	  return new Expressions.CompositeValue(/*"COMPOSITE", */ attributes, false);
	}
}

Map FieldValue(Env env, Map attributes) : {
	String fieldname = null;
	Expression expr = null;
}   {
	    fieldname = RelaxedString()
	    (    ":" expr = Expression(env) )
	    {
	      if (fieldname != null && expr != null)  {
	        if (attributes.containsKey(fieldname)) {
                throw new Error("Field already defined: " + fieldname);
	        }
	        attributes.put(fieldname, expr);
	      }
     	  return attributes;
	    }
	}

Value BasicValue(Env env) : {
	Token tok = null;
	Token head = null;
	Token tail = null;
	SequencePattern.PatternExpr seqRegex = null;
}   {
        tok =  { return new Expressions.RegexValue(/*"REGEX",*/ tok.image.substring(1,tok.image.length()-1)); }
        |
        tok =  { return new Expressions.PrimitiveValue("STRING", parseQuotedString(tok.image) ); }
        |
        tok = IntegerToken() { return new Expressions.PrimitiveValue("INTEGER", parseInteger(tok.image)); }
        |
        tok =  { return new Expressions.PrimitiveValue("INTEGER", parseLongInteger(tok.image)); }
        |
        tok =  { return new Expressions.PrimitiveValue("REAL", Double.valueOf(tok.image)); }
        |
        head = "(" seqRegex = SeqRegex(env) tail = ")" {
          String str = getStringFromTokens(head, tail, true);
          TokenSequencePattern seqPattern = new TokenSequencePattern(str, seqRegex);
          return new Expressions.PrimitiveValue("TOKEN_REGEX", seqPattern);
        }
	}

AssignableExpression AssignableVar(Env env) : {
	Token tok = null;
}   {
        tok =  { return new Expressions.VarExpression(tok.image); }
        |
        tok =  { return new Expressions.RegexMatchVarExpression(tok.image); }
	}

Expression VarOrRegexVar(Env env) : {
	Token tok = null;
}   {
        tok =  { return new Expressions.VarExpression(tok.image); }
        |
        tok =  { return new Expressions.RegexMatchVarExpression(tok.image); }
        |
        tok =  { return new Expressions.RegexMatchVarExpression(Integer.valueOf(tok.image.substring(1))); }
        |
        tok =  { return new Expressions.RegexMatchResultVarExpression(tok.image.substring(1)); }
        |
        tok =  { return new Expressions.RegexMatchResultVarExpression(Integer.valueOf(tok.image.substring(2))); }
	}

Expression MethodCallExpression(Env env, Expression parent) : {
  Token typeToken;
  Expression param;
  List params = new ArrayList();
} {
  (  typeToken = 
   ( "("
     (
       param = Expression(env) { params.add(param); }
       ( "," param = Expression(env) { params.add(param); } )*
     )?
     ")"
   )
  )
  { return new Expressions.MethodCallExpression(typeToken.image, parent, params); }
}

AssignableExpression AssignableNestedVarExpression(Env env) : {
  AssignableExpression expr;
  Expression fieldExpr;
  String s;
  Token tok;
  int i;
} {
  ( expr = AssignableVar(env)
   (
     LOOKAHEAD(2)
     i = Index()
    { expr = new Expressions.IndexedExpression(expr, i);   }
     |
     LOOKAHEAD(2)
     "["
     fieldExpr = Expression(env)
     { expr = new Expressions.FieldExpression(expr, fieldExpr); }
     "]"
     |
     LOOKAHEAD(2)
     "."
     s = RelaxedString()
     { expr = new Expressions.FieldExpression(expr, s); }
    )*
  )
  { return expr;   }
}

Expression NestedVarExpression(Env env) : {
  Expression expr;
  Expression fieldExpr;
  String s;
  Token tok;
  int i;
} {
  ( expr = VarOrRegexVar(env)
   (
     LOOKAHEAD(3)
     i = Index()
    { expr = new Expressions.IndexedExpression(expr, i);   }
     |
     LOOKAHEAD(3)
     "["
     fieldExpr = Expression(env)
     { expr = new Expressions.FieldExpression(expr, fieldExpr); }
     "]"
     |
     LOOKAHEAD(3)
     "."
     expr = MethodCallExpression(env, expr)
     |
     LOOKAHEAD(3)
     "."
     s = RelaxedString()
     { expr = new Expressions.FieldExpression(expr, s); }
    )*
  )
  { return expr;   }
}

Expression NestedFunctionCallExpression(Env env) : {
  Expression expr;
  Expression fieldExpr;
  String s;
  Token tok;
  int i;
} {
  ( expr = FunctionCallExpression(env)
   (
     LOOKAHEAD(3)
     i = Index()
    { expr = new Expressions.IndexedExpression(expr, i);   }
     |
     LOOKAHEAD(3)
     "["
     fieldExpr = Expression(env)
     { expr = new Expressions.FieldExpression(expr, fieldExpr); }
     "]"
     |
     LOOKAHEAD(3)
     "."
     expr = MethodCallExpression(env, expr)
     |
     LOOKAHEAD(3)
     "."
     s = RelaxedString()
     { expr = new Expressions.FieldExpression(expr, s); }
    )*
  )
  { return expr;   }
}

Expression ListExpression(Env env) : {
  List exprs = new ArrayList();
  Expression expr;
} {
  ( "("
     expr = Expression(env)
     { exprs.add(expr); }
   (
     ","
     expr = Expression(env)
     { exprs.add(expr); }
   )*
    ")"
  )
  { return new Expressions.ListExpression(Expressions.TYPE_LIST, exprs); }
}

Expression ListExpression2(Env env) : {
  List exprs = new ArrayList();
  Expression expr;
} {
  ( "["
     expr = Expression(env)
     { exprs.add(expr); }
   (
     ","
     expr = Expression(env)
     { exprs.add(expr); }
   )*
    "]"
  )
  { return new Expressions.ListExpression(Expressions.TYPE_LIST, exprs); }
}

Expression BasicCondExpression(Env env) : {
  Expression expr1 = null;
  Expression expr2 = null;
  Token op = null;
} {
  LOOKAHEAD(3)
  (
    expr1 = NestedVarExpression(env)
    ( op = CmpToken()
      expr2 = Expression(env) )?
  )
  { if (op == null) {
      return new Expressions.ConditionalExpression(expr1);
    } else {
      return new Expressions.ConditionalExpression(op.image, expr1, expr2);
    }
  }
  |
  LOOKAHEAD(3)
    expr1 = NestedFunctionCallExpression(env)
    { return new Expressions.ConditionalExpression(expr1); }
}

Expression CondGroup(Env env) : {
  Expression expr;
} {
  (
    ( expr = BasicCondExpression(env) )
    |
    (
      "(" expr = CondExpression(env)  ")"
    )
  )
  { return expr; }
}

Expression CondExpression(Env env) : {
  Expression child;
  List disjChildren = new ArrayList();
  List conjChildren = new ArrayList();
  Token op;
} {
    ( child = CondGroup(env)  { conjChildren.add(child); }  )
	(  ( op = "&&"| op = "||" )
	   child = CondGroup(env)
	  {
	     if ("&&".equals(op.image)) {
	      conjChildren.add(child);
	     } else if ("||".equals(op.image)) {
	       if (conjChildren.size() > 1) {
	          disjChildren.add(new Expressions.AndExpression(conjChildren));
	       } else {
	          disjChildren.add(conjChildren.get(0));
	       }
	       conjChildren = new ArrayList();
	       conjChildren.add(child);
	     }
	   }
	)*
 	{
	  if (conjChildren.size() > 1) {
	     disjChildren.add(new Expressions.AndExpression(conjChildren));
	  } else {
	     disjChildren.add(conjChildren.get(0));
	  }
 	  if (disjChildren.size() == 1)
		return disjChildren.get(0);
	  else
		return new Expressions.OrExpression(disjChildren);
    }
}
Expression CaseExpression(Env env) : {
  Expression cond = null;
  Expression expr = null;
  List> cases
    = new ArrayList>();
  Expression elseExpr = null;
} {
   (
    ":case" "{"
        ( cond = CondExpression(env) "=>"  expr = Expression(env)
          { cases.add(new Pair(cond, expr)); }
          ("," | ";")
        )+
        ( ":else" "=>" elseExpr = Expression(env))?
    "}"
   )
   { return new Expressions.CaseExpression(cases, elseExpr); }
}

String StringRegex(Env env) : {
  Token tok;
} {
    ( tok =  )  { return tok.image.substring(1,tok.image.length()-1); }
}


SequencePattern.PatternExpr SeqRegex(Env env) : {
  SequencePattern.PatternExpr expr;
  boolean hasStart = false;
  boolean hasEnd = false;
} {
   (
    ( "^" { hasStart = true; } )?
    ( expr = SeqRegexDisjConj(env) )
    ( "$" { hasEnd = true; } )?
   )
   {
     if (hasStart) {
       expr = new SequencePattern.SequencePatternExpr(SequencePattern.SEQ_BEGIN_PATTERN_EXPR, expr);
     }
     if (hasEnd) {
       expr = new SequencePattern.SequencePatternExpr(expr, SequencePattern.SEQ_END_PATTERN_EXPR);
     }
     return expr;
   }
}


Object StringNumberValue(Env env) : {
	Token tok = null;
}   {
        tok =  { return parseQuotedString(tok.image); }
        |
        tok = IntegerToken() { return parseInteger(tok.image); }
        |
        tok =  { return parseLongInteger(tok.image); }
        |
        tok =  { return Double.valueOf(tok.image); }
	}

SequencePattern.PatternExpr SeqRegexBasic(Env env) : {
  List children = new ArrayList();
  NodePattern node;
  MultiNodePattern multiNode;
  SequencePattern.PatternExpr expr;
  Object value = null;
} {
    (
     (
      ( ( node = BracketedNode(env) ) { expr = new SequencePattern.NodePatternExpr(node);  }
         |
        ( node = CoreMapWordPattern(env) ) { expr = new SequencePattern.NodePatternExpr(node);  }
         |
        ( multiNode = MultiNodePattern(env) ) { expr = new SequencePattern.MultiNodePatternExpr(multiNode);  }
         |
        ( expr = SeqRegexGroup(env)  )
         |
        ( expr = SeqVar(env) )
         |
        ( expr = SeqBackRef(env) )
      )
      ( expr = SeqRegexRepeatTimes(env, expr) )?
       {
         children.add(expr);
       }
     )+
// 	 ( "=>" value = StringNumberValue() )?
 	 ( "=>" value = Expression(env) )?
 	)
 	{ if (children.size() != 1) {
 	    expr = new SequencePattern.SequencePatternExpr(children);
 	  }
 	  if (value != null) {
 	    expr = new SequencePattern.ValuePatternExpr(expr, value);
 	  }
 	  return expr;
 	}
}

SequencePattern.PatternExpr SeqRegexRepeatTimes(Env env, SequencePattern.PatternExpr expr) : {
   Token value;
   Token v2;
   int min = -1;
   int max = -1;
   boolean greedy = true;
}  {
  (  (
       ( value = "*" )  { min = 0; max = -1;  }
       |
       ( value = "?" )  { min = 0; max = 1; }
       |
       ( value = "+" )  { min = 1; max = -1;  }
       |
    LOOKAHEAD(3)
       ( "{"  value =  "}" ) { min = Integer.parseInt(value.image); max = min;  }
       |
    LOOKAHEAD(4)
       ( "{"  value =  "," "}" ) { min = Integer.parseInt(value.image); max = -1;  }
       |
       ( "{"  value =   "," v2 =  "}" ) { min = Integer.parseInt(value.image); max = Integer.parseInt(v2.image);  }
   )
   (  "?" {  greedy = false; } )?
 )
   {
      return new SequencePattern.RepeatPatternExpr(expr, min, max, greedy);
   }
}


SequencePattern.PatternExpr SeqRegexDisj(Env env) : {
  List children = new ArrayList();
  SequencePattern.PatternExpr expr;
} {
    ( expr = SeqRegexBasic(env) ) { children.add(expr); }
    (
      ("|"|"||")  expr = SeqRegexBasic(env)  {  children.add(expr);  }
    )*
 	{
 	    if (children.size() == 1) { return children.get(0); }
		else { return new SequencePattern.OrPatternExpr(children);  }
    }
}

SequencePattern.PatternExpr SeqRegexDisjConj(Env env) : {
	SequencePattern.PatternExpr child;
	List disjChildren = new ArrayList();
	List conjChildren = new ArrayList();
	Token op;
} {
    ( child = SeqRegexBasic(env)  { conjChildren.add(child); }  )
	(  ( op = "&"| op = "|" | op = "&&"| op = "||" )
	   child = SeqRegexBasic(env)
	  {
	     if ("&".equals(op.image) || "&&".equals(op.image)) {
	      conjChildren.add(child);
	     } else if ("|".equals(op.image) || "||".equals(op.image)) {
	       if (conjChildren.size() > 1) {
	          disjChildren.add(new SequencePattern.AndPatternExpr(conjChildren));
	       } else {
	          disjChildren.add(conjChildren.get(0));
	       }
	       conjChildren = new ArrayList();
	       conjChildren.add(child);
	     }
	   }
	)*
 	{
	  if (conjChildren.size() > 1) {
	     disjChildren.add(new SequencePattern.AndPatternExpr(conjChildren));
	  } else {
	     disjChildren.add(conjChildren.get(0));
	  }
 	  if (disjChildren.size() == 1)
		return disjChildren.get(0);
	  else
		return new SequencePattern.OrPatternExpr(disjChildren);
    }
}

SequencePattern.PatternExpr SeqRegexGroup(Env env) : {
  SequencePattern.PatternExpr expr;
  boolean capturing = true;
  String varname = null;
  Token var;
} {
    (  "("
      ( ( "?:" {capturing = false; } )
        |
        ( "?" var =  {varname = var.image; } )
      )?
       ( (expr = SeqRegex(env) )  )
       ")"
     )

 	{
		if (varname != null) {
		   return new SequencePattern.GroupPatternExpr(expr, varname);
		} else {
		   return new SequencePattern.GroupPatternExpr(expr, capturing);
        }
    }
}

NodePattern BracketedNode(Env env) : {
  NodePattern node;
} {
  (
    LOOKAHEAD(2)
    "[" "]" { node = NodePattern.ANY_NODE; }
    |
    "["
    (  node = NodeDisjConj(env)  )
    "]"
  )
  { return node; }

}

SequencePattern.PatternExpr SeqVar(Env env) : {
  SequencePattern.PatternExpr expr;
  Token name;
} {
  (
    name = 
    {
        expr = env.getSequencePatternExpr(name.image, true);
        if (expr == null) {
            throw new Error("Unknown sequence pattern variable " + name.image);
        }
    }
  )
  { return expr; }

}

SequencePattern.PatternExpr SeqBackRef(Env env) : {
  Token name;
} {
    name = 
    {
        int v = Integer.parseInt(name.image.substring(1));
        return new SequencePattern.BackRefPatternExpr(CoreMapNodePattern.TEXT_ATTR_EQUAL_CHECKER, v);
    }
}


NodePattern Node(Env env) : {
  NodePattern node;
} {
  (
    ( node = BracketedNode(env) )
    |
    (  node = NodeGroup(env)  )
  )
  { return node; }
}

NodePattern NodeDisj(Env env) : {
	List children = new ArrayList();
	NodePattern child;
} {
    (  child = NodeGroup(env)  { children.add(child); }  )
	( ("|"|"||") child = NodeGroup(env)  { children.add(child); } )*
 	{ if (children.size() == 1)
		return child;
	  else
		return new NodePattern.DisjNodePattern(children);
    }
}

NodePattern NodeConj(Env env) : {
	NodePattern child;
	List children = new ArrayList();
} {
    ( child = NodeGroup(env)  { children.add(child); }  )
	( ("&"|"&&") child = NodeGroup(env)  { children.add(child); } )*
 	{ if (children.size() == 1)
		return child;
	  else
		return new NodePattern.ConjNodePattern(children);
    }
}

NodePattern NodeDisjConj(Env env) : {
	NodePattern child;
	List disjChildren = new ArrayList();
	List conjChildren = new ArrayList();
	Token op;
} {
    ( child = NodeGroup(env)  { conjChildren.add(child); }  )
	(  ( op = "&" | op = "|" | op = "&&" | op = "||" )
	   child = NodeGroup(env)
	  {
	     if ("&".equals(op.image) || "&&".equals(op.image)) {
	      conjChildren.add(child);
	     } else if ("|".equals(op.image) || ("||".equals(op.image))) {
	       if (conjChildren.size() > 1) {
	          disjChildren.add(new NodePattern.ConjNodePattern(conjChildren));
	       } else {
	          disjChildren.add(conjChildren.get(0));
	       }
	       conjChildren = new ArrayList();
	       conjChildren.add(child);
	     }
	   }
	)*
 	{
	  if (conjChildren.size() > 1) {
	     disjChildren.add(new NodePattern.ConjNodePattern(conjChildren));
	  } else {
	     disjChildren.add(conjChildren.get(0));
	  }
 	  if (disjChildren.size() == 1)
		return disjChildren.get(0);
	  else
		return new NodePattern.DisjNodePattern(disjChildren);
    }
}

NodePattern NodeGroup(Env env) : {
  NodePattern node;
} {
  (
    LOOKAHEAD(2)
    ( node = NodeBasic(env) )
    |
    LOOKAHEAD(2)
    (
      "(" node = NodeDisjConj(env)  ")"
    )
    |
    LOOKAHEAD(2)
    (
      "!" "(" node = NodeDisjConj(env)  ")"
      { node = new NodePattern.NegateNodePattern(node); }
    )
  )
  { return node; }
}

NodePattern NodeBasic(Env env) : {
	NodePattern child;
} {
   ( "!" child = CoreMapNode(env)
 	{ return new NodePattern.NegateNodePattern(child); } )
 	|
   (  child = CoreMapNode(env)
 	{ return child;  }    )
}

NodePattern CoreMapNode(Env env) : {
    Map attributes = new ArrayMap();
    NodePattern pat;
	Token value = null;
} {
	(
	    "{"
	        (
	            AttrValue(env, attributes)
	        )

			( ("," | ";")  AttrValue(env, attributes)
		    )*
		"}"
		    |  LOOKAHEAD(2)
                AttrValue(env, attributes)
		    | ( value = RelaxedStringToken() | value =   )
		      { attributes.put("word", value.image); }
    )
  	{ pat = CoreMapNodePattern.valueOf(env, attributes);
	  return pat;
	}
	|
	pat = CoreMapVarNodePattern(env)
	{
	    return pat;
	}
	|
	( "{{" pat = CoreMapExprNodePattern(env) "}}" )
	{
	    return pat;
	}
}

Map AttrValue(Env env, Map attributes) : {
	Token attr = null;
	Token value = null;
	Token tok = null;
	String str = null;
}   {
	    attr = 
	    (    ":" (value =  | value =  | value =  | str = CoreMapVarValue(env) )
	        | tok = "::"
	            value = 
	        | tok = 
	            ( value = NumberToken() | str = CoreMapVarValue(env) )
	    )
	    { if (value != null) { str = value.image; }
	      if (tok != null) { str = tok.image + str; }
	      if (attr != null && str != null)  {
	        if (attributes.containsKey(attr.image)) {
                throw new Error("Attribute match already defined: " + attr.image);
	        }
	        attributes.put(attr.image, str);
	      }
     	  return attributes;
	    }
	}



NodePattern CoreMapWordPattern(Env env) : {
    Map attributes = new ArrayMap();
    CoreMapNodePattern pat;
	Token value = null;
} {
	(
        ( value =  | value =   | value = 
           | value =  | value =  | value = 
           | value =  | value =  )
		{ attributes.put("word", value.image); }
    )
  	{ pat = CoreMapNodePattern.valueOf(env, attributes);
	  return pat;
	}
}

MultiNodePattern MultiNodePattern(Env env) : {
    NodePattern pat;
    MultiNodePattern mp;
    Token v1, v2;
    int min = 1, max = -1;
    boolean greedy = true;
} {
    "(?m)"
    (
    LOOKAHEAD(3)
     "{" v1 =  "}"
     { min = Integer.parseInt(v1.image);
       max = Integer.parseInt(v1.image); }
      |
    LOOKAHEAD(4)
     "{" v1 =  "," "}"
     { min = Integer.parseInt(v1.image); }
      |
     "{" v1 =  "," v2 =  "}"
     { min = Integer.parseInt(v1.image);
       max = Integer.parseInt(v2.image); }
    )?
    (  "?" {  greedy = false; } )?
     pat = CoreMapWordPattern(env)
    {
      mp = new MultiCoreMapNodePattern(pat);
      mp.setMinNodes(min);
      mp.setMaxNodes(max);
      mp.setGreedyMatch(greedy);
      return mp;
    }
}

String CoreMapVarValue(Env env) : {
	Token value = null;
}  {
	( value =  ) { return (String) env.get(value.image); }
}

NodePattern CoreMapVarNodePattern(Env env) : {
	Token value = null;
}  {
	( value =  ) { return env.getNodePattern(value.image); }
}

NodePattern CoreMapExprNodePattern(Env env) : {
	Expression expr = null;
}  {
	( expr = CondExpression(env) ) { return new CoreMapExpressionNodePattern(env, expr); }
}


Pair> SeqRegexWithAction(Env env) : {
  SequencePattern.PatternExpr expr;
  SequenceMatchAction action = null;
} {
    ( expr = SeqRegex(env) )
    ( action = Action(env) )?
    { return new Pair>(expr,action); }
}

SequenceMatchAction Action(Env env) : {
   SequenceMatchAction action;
} {
  "==>"
  (
    action = AnnotateAction(env)
  )
  { return action; }
}

SequenceMatchAction AnnotateAction(Env env) : {
  Map attributes;
//  int group;
} {
  "&annotate" "("
//  group =  ","
  attributes = SetAttrValues(env)
  ")"
  { return new CoreMapSequenceMatchAction.AnnotateAction( /*group,*/ attributes);  }
}

Map SetAttrValues(Env env) : {
    Map attributes = new ArrayMap();
} {
	(
	    "{"
	        (
	            SetAttrValue(env, attributes)
	        )

			( ("," | ";")  SetAttrValue(env, attributes)
		    )*
		"}"
    )
  	{
	  return attributes;
	}
}

Map SetAttrValue(Env env, Map attributes) : {
	Token attr = null;
	Token value = null;
	String str = null;
}   {
	    attr = 
	    (    "=" (value = RelaxedStringToken() | value = NumberToken() )
	    )
	    { if (value != null) { str = value.image; }
	      if (attr != null && str != null)  {
	        if (attributes.containsKey(attr.image)) {
                throw new Error("Attribute already defined: " + attr.image);
	        }
	        attributes.put(attr.image, str);
	      }
     	  return attributes;
	    }
	}

Token NumberToken() : {
   Token value = null;
} {
  ( value =  | value =  | value =  )
  { return value; }
}

Token IntegerToken() : {
   Token value = null;
} {
  ( value =  | value =  )
  { return value; }
}

Token CmpToken() : {
   Token value = null;
} {
  ( value =  | value =  )
  { return value; }
}

Token RelaxedStringToken() : {
   Token value = null;
} {
  ( value =  | value =  )
  { return value; }
}

String RelaxedString() : {
   Token value = null;
} {
  value =  { return parseQuotedString(value.image); }
  | ( value =  )
    { return value.image; }
}

/*
String VarName() : {
   Token value = null;
} {
  ( value =  )
    { return value.image; }
  | ( value =  )
    { return value.image; }
  | ( value =  )
    { return parseQuotedString(value.image); }
} */

String RelaxedStringNoIdentifier() : {
   Token value = null;
} {
  value =  { return parseQuotedString(value.image); }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy