All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.tokensregex.SequenceMatchResult Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.ling.tokensregex;

import edu.stanford.nlp.util.Comparators;
import java.util.function.Function;
import edu.stanford.nlp.util.HasInterval;
import edu.stanford.nlp.util.Interval;

import java.util.Comparator;
import java.util.List;
import java.util.regex.MatchResult;

/**
 * The result of a match against a sequence.
 * 

* Similar to Java's {@link MatchResult} except it is for sequences * over arbitrary types T instead of just characters. *

*

This interface contains query methods used to determine the * results of a match against a regular expression against an sequence. * The match boundaries, groups and group boundaries can be seen * but not modified through a {@link SequenceMatchResult}. * * @author Angel Chang * @see SequenceMatcher */ public interface SequenceMatchResult extends MatchResult, HasInterval { // TODO: Need to be careful with GROUP_BEFORE_MATCH/GROUP_AFTER_MATCH public static int GROUP_BEFORE_MATCH = Integer.MIN_VALUE; // Special match groups (before match) public static int GROUP_AFTER_MATCH = Integer.MIN_VALUE+1; // Special match groups (after match) public double score(); public double priority(); /** * Returns the original sequence the match was performed on. * @return The list that the match was performed on */ public List elements(); /** * Returns pattern used to create this sequence match result * @return the SequencePattern against which this sequence match result was matched */ public SequencePattern pattern(); /** * Returns the entire matched subsequence as a list. * @return the matched subsequence as a list * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public List groupNodes(); /** * Returns the matched group as a list. * @param group * The index of a capturing group in this matcher's pattern * @return the matched group as a list * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * * @throws IndexOutOfBoundsException * If there is no capturing group in the pattern * with the given index */ public List groupNodes(int group); public BasicSequenceMatchResult toBasicSequenceMatchResult(); // String lookup versions using variables /** * Returns the matched group as a list. * @param groupVar * The name of the capturing group in this matcher's pattern * @return the matched group as a list * or null if there is no capturing group in the pattern * with the given name * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public List groupNodes(String groupVar); /** * Returns the String representing the matched group. * @param groupVar * The name of the capturing group in this matcher's pattern * @return the matched group as a String * or null if there is no capturing group in the pattern * with the given name * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public String group(String groupVar); /** * Returns the start index of the subsequence captured by the given group * during this match. * @param groupVar * The name of the capturing group in this matcher's pattern * @return the index of the first element captured by the group, * or -1 if the match was successful but the group * itself did not match anything * or if there is no capturing group in the pattern * with the given name * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public int start(String groupVar); /** * Returns the index of the next element after the subsequence captured by the given group * during this match. * @param groupVar * The name of the capturing group in this matcher's pattern * @return the index of the next element after the subsequence captured by the group, * or -1 if the match was successful but the group * itself did not match anything * or if there is no capturing group in the pattern * with the given name * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public int end(String groupVar); public int getOrder(); /** * Returns an Object representing the result for the match for a particular node. * (actual Object returned depends on the type T of the nodes. For instance, * for a CoreMap, the match result is returned as a {@code Map}, while * for String, the match result is typically a MatchResult. * * @param index The index of the element in the original sequence. * @return The match result associated with the node at the given index. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * @throws IndexOutOfBoundsException * If the index is out of range */ public Object nodeMatchResult(int index); /** * Returns an Object representing the result for the match for a particular node in a group. * (actual Object returned depends on the type T of the nodes. For instance, * for a CoreMap, the match result is returned as a {@code Map}, while * for String, the match result is typically a MatchResult. * * @param groupid The index of a capturing group in this matcher's pattern * @param index The index of the element in the captured subsequence. * @return the match result associated with the node * at the given index for the captured group. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * @throws IndexOutOfBoundsException * If there is no capturing group in the pattern * with the given groupid or if the index is out of range */ public Object groupMatchResult(int groupid, int index); /** * Returns an Object representing the result for the match for a particular node in a group. * (actual Object returned depends on the type T of the nodes. For instance, * for a CoreMap, the match result is returned as a {@code Map}, while * for String, the match result is typically a MatchResult. * * @param groupVar The name of the capturing group in this matcher's pattern * @param index The index of the element in the captured subsequence. * @return the match result associated with the node * at the given index for the captured group. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * @throws IndexOutOfBoundsException * if the index is out of range */ public Object groupMatchResult(String groupVar, int index); /** * Returns a list of Objects representing the match results for the entire sequence. * * @return the list of match results associated with the entire sequence * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public List groupMatchResults(); /** * Returns a list of Objects representing the match results for the nodes in the group. * * @param group * The index of a capturing group in this matcher's pattern * @return the list of match results associated with the nodes * for the captured group. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * @throws IndexOutOfBoundsException * If there is no capturing group in the pattern * with the given index */ public List groupMatchResults(int group); /** * Returns a list of Objects representing the match results for the nodes in the group. * * @param groupVar * The name of the capturing group in this matcher's pattern * @return the list of match results associated with the nodes * for the captured group. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public List groupMatchResults(String groupVar); /** * Returns the value (some Object) associated with the entire matched sequence. * * @return value associated with the matched sequence. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public Object groupValue(); /** * Returns the value (some Object) associated with the captured group. * * @param group * The index of a capturing group in this matcher's pattern * @return value associated with the captured group. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public Object groupValue(int group); /** * Returns the value (some Object) associated with the captured group. * * @param var * The name of the capturing group in this matcher's pattern * @return value associated with the captured group. * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public Object groupValue(String var); public MatchedGroupInfo groupInfo(); public MatchedGroupInfo groupInfo(int group); public MatchedGroupInfo groupInfo(String var); public static final GroupToIntervalFunc TO_INTERVAL = new GroupToIntervalFunc(0); public static class GroupToIntervalFunc implements Function> { int group; public GroupToIntervalFunc(int group) { this.group = group; } public Interval apply(MR in) { return Interval.toInterval(in.start(group), in.end(group), Interval.INTERVAL_OPEN_END); } } public final static Comparator PRIORITY_COMPARATOR = (e1, e2) -> { double s1 = 0; if (e1 instanceof SequenceMatchResult) { s1 = ((SequenceMatchResult) e1).priority(); } double s2 = 0; if (e2 instanceof SequenceMatchResult) { s2 = ((SequenceMatchResult) e2).priority(); } if (s1 == s2) { return 0; } else { return (s1 > s2)? -1:1; } }; public final static Comparator SCORE_COMPARATOR = (e1, e2) -> { double s1 = 0; if (e1 instanceof SequenceMatchResult) { s1 = ((SequenceMatchResult) e1).score(); } double s2 = 0; if (e2 instanceof SequenceMatchResult) { s2 = ((SequenceMatchResult) e2).score(); } if (s1 == s2) { return 0; } else { return (s1 > s2)? -1:1; } }; public final static Comparator ORDER_COMPARATOR = (e1, e2) -> { int o1 = 0; if (e1 instanceof SequenceMatchResult) {o1 = ((SequenceMatchResult) e1).getOrder(); } int o2 = 0; if (e2 instanceof SequenceMatchResult) {o2 = ((SequenceMatchResult) e2).getOrder(); } if (o1 == o2) { return 0; } else { return (o1 < o2)? -1:1; } }; // Compares two match results. // Use to order match results by: // length (longest first), public final static Comparator LENGTH_COMPARATOR = (e1, e2) -> { int len1 = e1.end() - e1.start(); int len2 = e2.end() - e2.start(); if (len1 == len2) { return 0; } else { return (len1 > len2)? -1:1; } }; public final static Comparator OFFSET_COMPARATOR = (e1, e2) -> { if (e1.start() == e2.start()) { if (e1.end() == e2.end()) { return 0; } else { return (e1.end() < e2.end())? -1:1; } } else { return (e1.start() < e2.start())? -1:1; } }; // Compares two match results. // Use to order match results by: // priority (highest first), // score (highest first), // length (longest first), // and then beginning token offset (smaller offset first) // original order (smaller first) public final static Comparator PRIORITY_SCORE_LENGTH_ORDER_OFFSET_COMPARATOR = Comparators.chain(PRIORITY_COMPARATOR, SCORE_COMPARATOR, LENGTH_COMPARATOR, ORDER_COMPARATOR, OFFSET_COMPARATOR); public final static Comparator DEFAULT_COMPARATOR = PRIORITY_SCORE_LENGTH_ORDER_OFFSET_COMPARATOR; public final static Function SCORER = in -> { if (in instanceof SequenceMatchResult) { return ((SequenceMatchResult) in).score(); } else return 0.0; }; /** * Information about a matched group * @param */ public final static class MatchedGroupInfo { public final String text; public final List nodes; public final List matchResults; public final Object value; public final String varName; public MatchedGroupInfo(String text, List nodes, List matchResults, Object value, String varName) { this.text = text; this.nodes = nodes; this.matchResults = matchResults; this.value = value; this.varName = varName; } } }