All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
edu.stanford.nlp.ling.tokensregex.Env Maven / Gradle / Ivy
Go to download
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.ling.tokensregex.types.Tags;
import edu.stanford.nlp.pipeline.CoreMapAttributeAggregator;
import java.util.function.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Holds environment variables to be used for compiling string into a pattern.
* Use {@link EnvLookup} to perform actual lookup (it will provide reasonable defaults)
*
*
* Some of the types of variables to bind are:
*
* SequencePattern
(compiled pattern)
* PatternExpr
(sequence pattern expression - precompiled)
* NodePattern
(pattern for matching one element)
* Class
(binding of CoreMap attribute to java Class)
*
*
*/
public class Env {
/**
* Parser that converts a string into a SequencePattern.
* @see edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParser
*/
SequencePattern.Parser parser;
/**
* Mapping of variable names to their values
*/
Map variables = new HashMap();//Generics.newHashMap();
/**
* Mapping of per thread temporary variables to their values
*/
ThreadLocal> threadLocalVariables = new ThreadLocal>();
/**
* Mapping of variables that can be expanded in a regular expression for strings,
* to their regular expressions.
* The variable name must start with "$" and include only the alphanumeric characters
* (it should follow the pattern $[A-Za-z0-9_]+
).
* Each variable is mapped to a pair, consisting of the Pattern
representing
* the name of the variable to be replaced, and a String
representing the
* regular expression (escaped) that is used to replace the name of the variable.
*/
Map> stringRegexVariables = new HashMap>();//Generics.newHashMap();
/**
* Default parameters (used when reading in rules for {@link SequenceMatchRules}.
*/
public Map defaults = new HashMap();//Generics.newHashMap();
/**
* Default flags to use for string regular expressions match
* @see java.util.regex.Pattern#compile(String,int)
*/
public int defaultStringPatternFlags = 0;
/**
* Default flags to use for string literal match
* @see NodePattern#CASE_INSENSITIVE
*/
public int defaultStringMatchFlags = 0;
public Class sequenceMatchResultExtractor;
public Class stringMatchResultExtractor;
/**
* Annotation key to use to getting tokens (default is CoreAnnotations.TokensAnnotation.class)
*/
public Class defaultTokensAnnotationKey;
/**
* Annotation key to use to getting text (default is CoreAnnotations.TextAnnotation.class)
*/
public Class defaultTextAnnotationKey;
/**
* List of keys indicating the per-token annotations (default is null).
* If specified, each token will be annotated with the extracted results from the
* {@link #defaultResultsAnnotationExtractor}.
* If null, then individual tokens that are matched are not annotated.
*/
public List defaultTokensResultAnnotationKey;
/**
* List of keys indicating what fields should be annotated for the aggregated coremap.
* If specified, the aggregated coremap is annotated with the extracted results from the
* {@link #defaultResultsAnnotationExtractor}.
* If null, then the aggregated coremap is not annotated.
*/
public List defaultResultAnnotationKey;
/**
* Annotation key to use during composite phase for storing matched sequences and to match against.
*/
public Class defaultNestedResultsAnnotationKey;
/**
* How should the tokens be aggregated when collapsing a sequence of tokens into one CoreMap
*/
public Map defaultTokensAggregators;
/**
* How annotations be extracted from the MatchedExpression
* If the result type is a List and more than one annotation key is specified,
* then the result is paired with the annotation key
* Example: If annotation key is [ner,normalized] and result is [CITY,San Francisco]
* then the final coremap will have ner=CITY, normalized=San Francisco
* Otherwise, the result is treated as one object (all keys will be assigned that value).
*/
Function defaultResultsAnnotationExtractor;
/**
* Interface for performing custom binding of values to the environment
*/
public static interface Binder {
public void init(String prefix, Properties props);
public void bind(Env env);
}
public Env(SequencePattern.Parser p) { this.parser = p; }
public void initDefaultBindings() {
bind("FALSE", Expressions.FALSE);
bind("TRUE", Expressions.TRUE);
bind("NIL", Expressions.NIL);
bind("ENV", this);
bind("tags", Tags.TagsAnnotation.class);
}
public Map getDefaults() {
return defaults;
}
public void setDefaults(Map defaults) {
this.defaults = defaults;
}
public Map getDefaultTokensAggregators() {
return defaultTokensAggregators;
}
public void setDefaultTokensAggregators(Map defaultTokensAggregators) {
this.defaultTokensAggregators = defaultTokensAggregators;
}
public Class getDefaultTextAnnotationKey() {
return defaultTextAnnotationKey;
}
public void setDefaultTextAnnotationKey(Class defaultTextAnnotationKey) {
this.defaultTextAnnotationKey = defaultTextAnnotationKey;
}
public Class getDefaultTokensAnnotationKey() {
return defaultTokensAnnotationKey;
}
public void setDefaultTokensAnnotationKey(Class defaultTokensAnnotationKey) {
this.defaultTokensAnnotationKey = defaultTokensAnnotationKey;
}
public List getDefaultTokensResultAnnotationKey() {
return defaultTokensResultAnnotationKey;
}
public void setDefaultTokensResultAnnotationKey(Class... defaultTokensResultAnnotationKey) {
this.defaultTokensResultAnnotationKey = Arrays.asList(defaultTokensResultAnnotationKey);
}
public void setDefaultTokensResultAnnotationKey(List defaultTokensResultAnnotationKey) {
this.defaultTokensResultAnnotationKey = defaultTokensResultAnnotationKey;
}
public List getDefaultResultAnnotationKey() {
return defaultResultAnnotationKey;
}
public void setDefaultResultAnnotationKey(Class... defaultResultAnnotationKey) {
this.defaultResultAnnotationKey = Arrays.asList(defaultResultAnnotationKey);
}
public void setDefaultResultAnnotationKey(List defaultResultAnnotationKey) {
this.defaultResultAnnotationKey = defaultResultAnnotationKey;
}
public Class getDefaultNestedResultsAnnotationKey() {
return defaultNestedResultsAnnotationKey;
}
public void setDefaultNestedResultsAnnotationKey(Class defaultNestedResultsAnnotationKey) {
this.defaultNestedResultsAnnotationKey = defaultNestedResultsAnnotationKey;
}
public Function getDefaultResultsAnnotationExtractor() {
return defaultResultsAnnotationExtractor;
}
public void setDefaultResultsAnnotationExtractor(Function defaultResultsAnnotationExtractor) {
this.defaultResultsAnnotationExtractor = defaultResultsAnnotationExtractor;
}
public Class getSequenceMatchResultExtractor() {
return sequenceMatchResultExtractor;
}
public void setSequenceMatchResultExtractor(Class sequenceMatchResultExtractor) {
this.sequenceMatchResultExtractor = sequenceMatchResultExtractor;
}
public Class getStringMatchResultExtractor() {
return stringMatchResultExtractor;
}
public void setStringMatchResultExtractor(Class stringMatchResultExtractor) {
this.stringMatchResultExtractor = stringMatchResultExtractor;
}
public Map getVariables() {
return variables;
}
public void setVariables(Map variables) {
this.variables = variables;
}
public void clearVariables() {
this.variables.clear();
}
public int getDefaultStringPatternFlags() {
return defaultStringPatternFlags;
}
public void setDefaultStringPatternFlags(int defaultStringPatternFlags) {
this.defaultStringPatternFlags = defaultStringPatternFlags;
}
public int getDefaultStringMatchFlags() {
return defaultStringMatchFlags;
}
public void setDefaultStringMatchFlags(int defaultStringMatchFlags) {
this.defaultStringMatchFlags = defaultStringMatchFlags;
}
private static final Pattern STRING_REGEX_VAR_NAME_PATTERN = Pattern.compile("\\$[A-Za-z0-9_]+");
public void bindStringRegex(String var, String regex)
{
// Enforce requirements on variable names ($alphanumeric_)
if (!STRING_REGEX_VAR_NAME_PATTERN.matcher(var).matches()) {
throw new IllegalArgumentException("StringRegex binding error: Invalid variable name " + var);
}
Pattern varPattern = Pattern.compile(Pattern.quote(var));
String replace = Matcher.quoteReplacement(regex);
stringRegexVariables.put(var, new Pair(varPattern, replace));
}
public String expandStringRegex(String regex)
{
// Replace all variables in regex
String expanded = regex;
for (String v:stringRegexVariables.keySet()) {
Pair p = stringRegexVariables.get(v);
expanded = p.first().matcher(expanded).replaceAll(p.second());
}
return expanded;
}
public Pattern getStringPattern(String regex)
{
String expanded = expandStringRegex(regex);
return Pattern.compile(expanded, defaultStringPatternFlags);
}
public void bind(String name, Object obj) {
if (obj != null) {
variables.put(name, obj);
} else {
variables.remove(name);
}
}
public void bind(String name, SequencePattern pattern) {
bind(name, pattern.getPatternExpr());
}
public void unbind(String name) {
bind(name, null);
}
public NodePattern getNodePattern(String name)
{
Object obj = variables.get(name);
if (obj != null) {
if (obj instanceof SequencePattern) {
SequencePattern seqPattern = (SequencePattern) obj;
if (seqPattern.getPatternExpr() instanceof SequencePattern.NodePatternExpr) {
return ((SequencePattern.NodePatternExpr) seqPattern.getPatternExpr()).nodePattern;
} else {
throw new Error("Invalid node pattern class: " + seqPattern.getPatternExpr().getClass() + " for variable " + name);
}
} else if (obj instanceof SequencePattern.NodePatternExpr) {
SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr) obj;
return pe.nodePattern;
} else if (obj instanceof NodePattern) {
return (NodePattern) obj;
} else if (obj instanceof String) {
try {
SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr) parser.parseNode(this, (String) obj);
return pe.nodePattern;
} catch (Exception pex) {
throw new RuntimeException("Error parsing " + obj + " to node pattern", pex);
}
} else {
throw new Error("Invalid node pattern variable class: " + obj.getClass() + " for variable " + name);
}
}
return null;
}
public SequencePattern.PatternExpr getSequencePatternExpr(String name, boolean copy)
{
Object obj = variables.get(name);
if (obj != null) {
if (obj instanceof SequencePattern) {
SequencePattern seqPattern = (SequencePattern) obj;
return seqPattern.getPatternExpr();
} else if (obj instanceof SequencePattern.PatternExpr) {
SequencePattern.PatternExpr pe = (SequencePattern.PatternExpr) obj;
return (copy)? pe.copy():pe;
} else if (obj instanceof NodePattern) {
return new SequencePattern.NodePatternExpr( (NodePattern) obj);
} else if (obj instanceof String) {
try {
return parser.parseSequence(this, (String) obj);
} catch (Exception pex) {
throw new RuntimeException("Error parsing " + obj + " to sequence pattern", pex);
}
} else {
throw new Error("Invalid sequence pattern variable class: " + obj.getClass());
}
}
return null;
}
public Object get(String name)
{
return variables.get(name);
}
// Functions for storing temporary thread specific variables
// that are used when running tokensregex
public void push(String name, Object value) {
Map vars = threadLocalVariables.get();
if (vars == null) {
threadLocalVariables.set(vars = new HashMap());//Generics.newHashMap());
}
Stack stack = (Stack) vars.get(name);
if (stack == null) {
vars.put(name, stack = new Stack());
}
stack.push(value);
}
public Object pop(String name) {
Map vars = threadLocalVariables.get();
if (vars == null) return null;
Stack stack = (Stack) vars.get(name);
if (stack == null || stack.isEmpty()) {
return null;
} else {
return stack.pop();
}
}
public Object peek(String name) {
Map vars = threadLocalVariables.get();
if (vars == null) return null;
Stack stack = (Stack) vars.get(name);
if (stack == null || stack.isEmpty()) {
return null;
} else {
return stack.peek();
}
}
}