All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ling.tokensregex.MultiPatternMatcher Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.ling.tokensregex;

import edu.stanford.nlp.util.*;

import java.util.*;
import java.util.function.Function;

/**
 * Matcher that takes in multiple patterns
 *
 * @author Angel Chang
 */
public class MultiPatternMatcher {
  Collection> patterns;
  SequencePatternTrigger patternTrigger;

  public MultiPatternMatcher(SequencePatternTrigger patternTrigger,
                             Collection> patterns)
  {
    this.patterns = new ArrayList>();
    this.patterns.addAll(patterns);
    this.patternTrigger = patternTrigger;
  }

  public MultiPatternMatcher(SequencePatternTrigger patternTrigger,
                             SequencePattern... patterns)
  {
    this(patterns);
    this.patternTrigger = patternTrigger;
  }

  public MultiPatternMatcher(Collection> patterns)
  {
    this.patterns = patterns;
  }

  public MultiPatternMatcher(SequencePattern... patterns)
  {
    this.patterns = new ArrayList>(patterns.length);
    for (SequencePattern p:patterns) {
      this.patterns.add(p);
    }
  }

  /**
   * Given a sequence, applies our patterns over the sequence and returns
   *   all non overlapping matches.  When multiple patterns overlaps,
   *   matched patterns are selected by
   *     the highest priority/score is selected,
   *     then the longest pattern,
   *     then the starting offset,
   *     then the original order
   * @param elements input sequence to match against
   * @return list of match results that are non-overlapping
   */
  public List> findNonOverlapping(List elements)
  {
    return findNonOverlapping(elements, SequenceMatchResult.DEFAULT_COMPARATOR);
  }

  /**
   * Given a sequence, applies our patterns over the sequence and returns
   *   all non overlapping matches.  When multiple patterns overlaps,
   *   matched patterns are selected by order specified by the comparator
   * @param elements input sequence to match against
   * @param cmp comparator indicating order that overlapped sequences should be selected.
   * @return list of match results that are non-overlapping
   */
  public List> findNonOverlapping(List elements,
                                                         Comparator cmp)
  {
    Collection> triggered = getTriggeredPatterns(elements);
    List> all = new ArrayList>();
    int i = 0;
    for (SequencePattern p:triggered) {
      SequenceMatcher m = p.getMatcher(elements);
      m.setOrder(i);
      while (m.find()) {
        all.add(m.toBasicSequenceMatchResult());
      }
      i++;
    }
    List> res = IntervalTree.getNonOverlapping( all, SequenceMatchResult.TO_INTERVAL, cmp);
    Collections.sort(res, SequenceMatchResult.OFFSET_COMPARATOR);

    return res;
  }

  /**
   * Given a sequence, applies our patterns over the sequence and returns
   *   all matches, depending on the findType.  When multiple patterns overlaps,
   *   matched patterns are selected by order specified by the comparator
   * @param elements input sequence to match against
   * @param findType whether FindType.FIND_ALL or FindType.FIND_NONOVERLAPPING
   * @return list of match results
   */
  public List> find(List elements, SequenceMatcher.FindType findType)
  {
    Collection> triggered = getTriggeredPatterns(elements);
    List> all = new ArrayList>();
    int i = 0;
    for (SequencePattern p:triggered) {
      SequenceMatcher m = p.getMatcher(elements);
      m.setFindType(findType);
      m.setOrder(i);
      while (m.find()) {
        all.add(m.toBasicSequenceMatchResult());
      }
      i++;
    }
    List> res = IntervalTree.getNonOverlapping( all, SequenceMatchResult.TO_INTERVAL, SequenceMatchResult.DEFAULT_COMPARATOR);
    Collections.sort(res, SequenceMatchResult.OFFSET_COMPARATOR);

    return res;
  }



  /**
   * Given a sequence, applies our patterns over the sequence and returns
   *   all non overlapping matches.  When multiple patterns overlaps,
   *   matched patterns are selected to give the overall maximum score
   * @param elements input sequence to match against
   * @return list of match results that are non-overlapping
   */
  public List> findNonOverlappingMaxScore(List elements)
  {
    return findNonOverlappingMaxScore(elements, SequenceMatchResult.SCORER);
  }

  /**
   * Given a sequence, applies our patterns over the sequence and returns
   *   all non overlapping matches.  When multiple patterns overlaps,
   *   matched patterns are selected to give the overall maximum score
   * @param elements input sequence to match against
   * @param scorer scorer for scoring each match
   * @return list of match results that are non-overlapping
   */
  public List> findNonOverlappingMaxScore(List elements,
                                                                 Function scorer)
  {
    Collection> triggered = getTriggeredPatterns(elements);
    List> all = new ArrayList>();
    int i = 0;
    for (SequencePattern p:triggered) {
      SequenceMatcher m = p.getMatcher(elements);
      m.setOrder(i);
      while (m.find()) {
        all.add(m.toBasicSequenceMatchResult());
      }
      i++;
    }
    List> res = IntervalTree.getNonOverlappingMaxScore( all, SequenceMatchResult.TO_INTERVAL, scorer);
    Collections.sort(res, SequenceMatchResult.OFFSET_COMPARATOR);

    return res;
  }

  /**
   * Given a sequence, applies each of our patterns over the sequence and returns
   *   all non overlapping matches for each of the patterns.
   * Unlike #findAllNonOverlapping, overlapping matches from different patterns are kept
   * @param elements input sequence to match against
   * @return iterable of match results that are non-overlapping
   */
  public Iterable> findAllNonOverlappingMatchesPerPattern(List elements)
  {
    Collection> triggered = getTriggeredPatterns(elements);
    List>> allMatches = new ArrayList>>(elements.size());
    for (SequencePattern p:triggered) {
      Iterable> matches = p.getMatcher(elements).findAllNonOverlapping();
      allMatches.add(matches);
    }
    return Iterables.chain(allMatches);
  }

  /**
   * Given a sequence, return the collection of patterns that are triggered by the sequence
   *   (these patterns are the ones that may potentially match a subsequence in the sequence)
   * @param elements Input sequence
   * @return Collection of triggered patterns
   */
  public Collection> getTriggeredPatterns(List elements) {
    if (patternTrigger != null) {
      return patternTrigger.apply(elements);
    } else {
      return patterns;
    }
  }

  /** Interfaces for optimizing application of many SequencePatterns over a particular sequence */

  /**
   * A function which returns a collections of patterns that may match when
   *   given a single node from a larger sequence.
   * @param 
   */
  public static interface NodePatternTrigger extends Function>> {}

  /**
   * A function which returns a collections of patterns that may match when
   *   a sequence of nodes.  Note that this function needs to be conservative
   *   and should return ALL patterns that may match.
   * @param 
   */
  public static interface SequencePatternTrigger extends Function, Collection>> {}

  /**
   * Simple SequencePatternTrigger that looks at each node, and identifies which
   *   patterns may potentially match each node, and then aggregates (union)
   *   all these patterns together.  Original ordering of patterns is preserved.
   * @param 
   */
  public static class BasicSequencePatternTrigger implements SequencePatternTrigger {
    NodePatternTrigger trigger;

    public BasicSequencePatternTrigger(NodePatternTrigger trigger) {
      this.trigger = trigger;
    }

    @Override
    public Collection> apply(List elements) {
      // Use LinkedHashSet to preserve original ordering of patterns.
      Set> triggeredPatterns = new LinkedHashSet>();
      for (T node:elements) {
        Collection> triggered = trigger.apply(node);
        triggeredPatterns.addAll(triggered);
      }
      return triggeredPatterns;
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy