All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.tokensregex.Env Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.ling.tokensregex;

import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.ling.tokensregex.types.Tags;
import edu.stanford.nlp.pipeline.CoreMapAttributeAggregator;
import java.util.function.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;

import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Holds environment variables to be used for compiling string into a pattern.
 * Use {@link EnvLookup} to perform actual lookup (it will provide reasonable defaults)
 *
 * 

* Some of the types of variables to bind are: *

    *
  • SequencePattern (compiled pattern)
  • *
  • PatternExpr (sequence pattern expression - precompiled)
  • *
  • NodePattern (pattern for matching one element)
  • *
  • Class (binding of CoreMap attribute to java Class)
  • *
*

*/ public class Env { /** * Parser that converts a string into a SequencePattern. * @see edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParser */ SequencePattern.Parser parser; /** * Mapping of variable names to their values */ Map variables = new HashMap();//Generics.newHashMap(); /** * Mapping of per thread temporary variables to their values */ ThreadLocal> threadLocalVariables = new ThreadLocal>(); /** * Mapping of variables that can be expanded in a regular expression for strings, * to their regular expressions. * The variable name must start with "$" and include only the alphanumeric characters * (it should follow the pattern $[A-Za-z0-9_]+). * Each variable is mapped to a pair, consisting of the Pattern representing * the name of the variable to be replaced, and a String representing the * regular expression (escaped) that is used to replace the name of the variable. */ Map> stringRegexVariables = new HashMap>();//Generics.newHashMap(); /** * Default parameters (used when reading in rules for {@link SequenceMatchRules}. */ public Map defaults = new HashMap();//Generics.newHashMap(); /** * Default flags to use for string regular expressions match * @see java.util.regex.Pattern#compile(String,int) */ public int defaultStringPatternFlags = 0; /** * Default flags to use for string literal match * @see NodePattern#CASE_INSENSITIVE */ public int defaultStringMatchFlags = 0; public Class sequenceMatchResultExtractor; public Class stringMatchResultExtractor; /** * Annotation key to use to getting tokens (default is CoreAnnotations.TokensAnnotation.class) */ public Class defaultTokensAnnotationKey; /** * Annotation key to use to getting text (default is CoreAnnotations.TextAnnotation.class) */ public Class defaultTextAnnotationKey; /** * List of keys indicating the per-token annotations (default is null). * If specified, each token will be annotated with the extracted results from the * {@link #defaultResultsAnnotationExtractor}. * If null, then individual tokens that are matched are not annotated. */ public List defaultTokensResultAnnotationKey; /** * List of keys indicating what fields should be annotated for the aggregated coremap. * If specified, the aggregated coremap is annotated with the extracted results from the * {@link #defaultResultsAnnotationExtractor}. * If null, then the aggregated coremap is not annotated. */ public List defaultResultAnnotationKey; /** * Annotation key to use during composite phase for storing matched sequences and to match against. */ public Class defaultNestedResultsAnnotationKey; /** * How should the tokens be aggregated when collapsing a sequence of tokens into one CoreMap */ public Map defaultTokensAggregators; /** * How annotations be extracted from the MatchedExpression * If the result type is a List and more than one annotation key is specified, * then the result is paired with the annotation key * Example: If annotation key is [ner,normalized] and result is [CITY,San Francisco] * then the final coremap will have ner=CITY, normalized=San Francisco * Otherwise, the result is treated as one object (all keys will be assigned that value). */ Function defaultResultsAnnotationExtractor; /** * Interface for performing custom binding of values to the environment */ public static interface Binder { public void init(String prefix, Properties props); public void bind(Env env); } public Env(SequencePattern.Parser p) { this.parser = p; } public void initDefaultBindings() { bind("FALSE", Expressions.FALSE); bind("TRUE", Expressions.TRUE); bind("NIL", Expressions.NIL); bind("ENV", this); bind("tags", Tags.TagsAnnotation.class); } public Map getDefaults() { return defaults; } public void setDefaults(Map defaults) { this.defaults = defaults; } public Map getDefaultTokensAggregators() { return defaultTokensAggregators; } public void setDefaultTokensAggregators(Map defaultTokensAggregators) { this.defaultTokensAggregators = defaultTokensAggregators; } public Class getDefaultTextAnnotationKey() { return defaultTextAnnotationKey; } public void setDefaultTextAnnotationKey(Class defaultTextAnnotationKey) { this.defaultTextAnnotationKey = defaultTextAnnotationKey; } public Class getDefaultTokensAnnotationKey() { return defaultTokensAnnotationKey; } public void setDefaultTokensAnnotationKey(Class defaultTokensAnnotationKey) { this.defaultTokensAnnotationKey = defaultTokensAnnotationKey; } public List getDefaultTokensResultAnnotationKey() { return defaultTokensResultAnnotationKey; } public void setDefaultTokensResultAnnotationKey(Class... defaultTokensResultAnnotationKey) { this.defaultTokensResultAnnotationKey = Arrays.asList(defaultTokensResultAnnotationKey); } public void setDefaultTokensResultAnnotationKey(List defaultTokensResultAnnotationKey) { this.defaultTokensResultAnnotationKey = defaultTokensResultAnnotationKey; } public List getDefaultResultAnnotationKey() { return defaultResultAnnotationKey; } public void setDefaultResultAnnotationKey(Class... defaultResultAnnotationKey) { this.defaultResultAnnotationKey = Arrays.asList(defaultResultAnnotationKey); } public void setDefaultResultAnnotationKey(List defaultResultAnnotationKey) { this.defaultResultAnnotationKey = defaultResultAnnotationKey; } public Class getDefaultNestedResultsAnnotationKey() { return defaultNestedResultsAnnotationKey; } public void setDefaultNestedResultsAnnotationKey(Class defaultNestedResultsAnnotationKey) { this.defaultNestedResultsAnnotationKey = defaultNestedResultsAnnotationKey; } public Function getDefaultResultsAnnotationExtractor() { return defaultResultsAnnotationExtractor; } public void setDefaultResultsAnnotationExtractor(Function defaultResultsAnnotationExtractor) { this.defaultResultsAnnotationExtractor = defaultResultsAnnotationExtractor; } public Class getSequenceMatchResultExtractor() { return sequenceMatchResultExtractor; } public void setSequenceMatchResultExtractor(Class sequenceMatchResultExtractor) { this.sequenceMatchResultExtractor = sequenceMatchResultExtractor; } public Class getStringMatchResultExtractor() { return stringMatchResultExtractor; } public void setStringMatchResultExtractor(Class stringMatchResultExtractor) { this.stringMatchResultExtractor = stringMatchResultExtractor; } public Map getVariables() { return variables; } public void setVariables(Map variables) { this.variables = variables; } public void clearVariables() { this.variables.clear(); } public int getDefaultStringPatternFlags() { return defaultStringPatternFlags; } public void setDefaultStringPatternFlags(int defaultStringPatternFlags) { this.defaultStringPatternFlags = defaultStringPatternFlags; } public int getDefaultStringMatchFlags() { return defaultStringMatchFlags; } public void setDefaultStringMatchFlags(int defaultStringMatchFlags) { this.defaultStringMatchFlags = defaultStringMatchFlags; } private static final Pattern STRING_REGEX_VAR_NAME_PATTERN = Pattern.compile("\\$[A-Za-z0-9_]+"); public void bindStringRegex(String var, String regex) { // Enforce requirements on variable names ($alphanumeric_) if (!STRING_REGEX_VAR_NAME_PATTERN.matcher(var).matches()) { throw new IllegalArgumentException("StringRegex binding error: Invalid variable name " + var); } Pattern varPattern = Pattern.compile(Pattern.quote(var)); String replace = Matcher.quoteReplacement(regex); stringRegexVariables.put(var, new Pair(varPattern, replace)); } public String expandStringRegex(String regex) { // Replace all variables in regex String expanded = regex; for (String v:stringRegexVariables.keySet()) { Pair p = stringRegexVariables.get(v); expanded = p.first().matcher(expanded).replaceAll(p.second()); } return expanded; } public Pattern getStringPattern(String regex) { String expanded = expandStringRegex(regex); return Pattern.compile(expanded, defaultStringPatternFlags); } public void bind(String name, Object obj) { if (obj != null) { variables.put(name, obj); } else { variables.remove(name); } } public void bind(String name, SequencePattern pattern) { bind(name, pattern.getPatternExpr()); } public void unbind(String name) { bind(name, null); } public NodePattern getNodePattern(String name) { Object obj = variables.get(name); if (obj != null) { if (obj instanceof SequencePattern) { SequencePattern seqPattern = (SequencePattern) obj; if (seqPattern.getPatternExpr() instanceof SequencePattern.NodePatternExpr) { return ((SequencePattern.NodePatternExpr) seqPattern.getPatternExpr()).nodePattern; } else { throw new Error("Invalid node pattern class: " + seqPattern.getPatternExpr().getClass() + " for variable " + name); } } else if (obj instanceof SequencePattern.NodePatternExpr) { SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr) obj; return pe.nodePattern; } else if (obj instanceof NodePattern) { return (NodePattern) obj; } else if (obj instanceof String) { try { SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr) parser.parseNode(this, (String) obj); return pe.nodePattern; } catch (Exception pex) { throw new RuntimeException("Error parsing " + obj + " to node pattern", pex); } } else { throw new Error("Invalid node pattern variable class: " + obj.getClass() + " for variable " + name); } } return null; } public SequencePattern.PatternExpr getSequencePatternExpr(String name, boolean copy) { Object obj = variables.get(name); if (obj != null) { if (obj instanceof SequencePattern) { SequencePattern seqPattern = (SequencePattern) obj; return seqPattern.getPatternExpr(); } else if (obj instanceof SequencePattern.PatternExpr) { SequencePattern.PatternExpr pe = (SequencePattern.PatternExpr) obj; return (copy)? pe.copy():pe; } else if (obj instanceof NodePattern) { return new SequencePattern.NodePatternExpr( (NodePattern) obj); } else if (obj instanceof String) { try { return parser.parseSequence(this, (String) obj); } catch (Exception pex) { throw new RuntimeException("Error parsing " + obj + " to sequence pattern", pex); } } else { throw new Error("Invalid sequence pattern variable class: " + obj.getClass()); } } return null; } public Object get(String name) { return variables.get(name); } // Functions for storing temporary thread specific variables // that are used when running tokensregex public void push(String name, Object value) { Map vars = threadLocalVariables.get(); if (vars == null) { threadLocalVariables.set(vars = new HashMap());//Generics.newHashMap()); } Stack stack = (Stack) vars.get(name); if (stack == null) { vars.put(name, stack = new Stack()); } stack.push(value); } public Object pop(String name) { Map vars = threadLocalVariables.get(); if (vars == null) return null; Stack stack = (Stack) vars.get(name); if (stack == null || stack.isEmpty()) { return null; } else { return stack.pop(); } } public Object peek(String name) { Map vars = threadLocalVariables.get(); if (vars == null) return null; Stack stack = (Stack) vars.get(name); if (stack == null || stack.isEmpty()) { return null; } else { return stack.peek(); } } }