All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.Dependencies Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

The newest version!
package edu.stanford.nlp.trees; 
import edu.stanford.nlp.util.logging.Redwood;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import edu.stanford.nlp.ling.HasIndex;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import java.util.function.Predicate;
import edu.stanford.nlp.util.Generics;

/** Utilities for Dependency objects.
 *
 *  @author Christopher Manning
 */
public class Dependencies  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(Dependencies.class);

  private Dependencies() {} // only static methods


  public static class DependentPuncTagRejectFilter implements Predicate>, Serializable {

    private Predicate tagRejectFilter;

    public DependentPuncTagRejectFilter(Predicate trf) {
      tagRejectFilter = trf;
    }

    @Override
    public boolean test(Dependency d) {
      /*
      log.info("DRF: Checking " + d + ": hasTag?: " +
                         (d.dependent() instanceof HasTag) + "; value: " +
                         ((d.dependent() instanceof HasTag)? ((HasTag) d.dependent()).tag(): null));
      */
      if (d == null) {
        return false;
      }
      if ( ! (d.dependent() instanceof HasTag)) {
        return false;
      }
      String tag = ((HasTag) d.dependent()).tag();
      return tagRejectFilter.test(tag);
    }

    private static final long serialVersionUID = -7732189363171164852L;

  } // end class DependentPuncTagRejectFilter


  public static class DependentPuncWordRejectFilter implements Predicate>, Serializable {

    /**
     *
     */
    private static final long serialVersionUID = 1166489968248785287L;
    private final Predicate wordRejectFilter;

    /** @param wrf A filter that rejects punctuation words.
     */
    public DependentPuncWordRejectFilter(Predicate wrf) {
      // log.info("wrf is " + wrf);
      wordRejectFilter = wrf;
    }

    @Override
    public boolean test(Dependency d) {
      /*
      log.info("DRF: Checking " + d + ": hasWord?: " +
                         (d.dependent() instanceof HasWord) + "; value: " +
                         ((d.dependent() instanceof HasWord)? ((HasWord) d.dependent()).word(): d.dependent().value()));
      */
      if (d == null) {
        return false;
      }
      String word = null;
      if (d.dependent() instanceof HasWord) {
        word = ((HasWord) d.dependent()).word();
      }
      if (word == null) {
        word = d.dependent().value();
      }
      // log.info("Dep: kid is " + ((MapLabel) d.dependent()).toString("value{map}"));
      return wordRejectFilter.test(word);
    }

  } // end class DependentPuncWordRejectFilter


  // extra class guarantees correct lazy loading (Bloch p.194)
  private static class ComparatorHolder {

    private ComparatorHolder() {}

    private static class DependencyIdxComparator implements Comparator {

      @Override
      public int compare(Dependency dep1, Dependency dep2) {
        HasIndex dep1lab = (HasIndex) dep1.dependent();
        HasIndex dep2lab = (HasIndex) dep2.dependent();
        int dep1idx = dep1lab.index();
        int dep2idx = dep2lab.index();
        return dep1idx - dep2idx;
      }

    }

    private static final Comparator dc = new DependencyIdxComparator();

  }

  public static Map> govToDepMap(List deps) {
    Map> govToDepMap = Generics.newHashMap();
    for (TypedDependency dep : deps) {
      IndexedWord gov = dep.gov();

      List depList = govToDepMap.get(gov);
      if (depList == null) {
        depList = new ArrayList<>();
        govToDepMap.put(gov, depList);
      }
      depList.add(dep);
    }
    return govToDepMap;
  }

  private static Set> getGovMaxChains(Map> govToDepMap, IndexedWord gov, int depth) {
    Set> depLists = Generics.newHashSet();
    List children = govToDepMap.get(gov);

    if (depth > 0 && children != null) {
      for (TypedDependency child : children) {
        IndexedWord childNode = child.dep();
        if (childNode == null) continue;
        Set> childDepLists = getGovMaxChains(govToDepMap, childNode, depth-1);
        if (childDepLists.size() != 0) {
          for (List childDepList : childDepLists) {
            List depList = new ArrayList<>(childDepList.size() + 1);
            depList.add(child);
            depList.addAll(childDepList);
            depLists.add(depList);
          }
        } else {
          depLists.add(Arrays.asList(child));
        }
      }
    }
    return depLists;
  }

  public static Counter> getTypedDependencyChains(List deps, int maxLength) {
    Map> govToDepMap = govToDepMap(deps);
    Counter> tdc = new ClassicCounter<>();
    for (IndexedWord gov : govToDepMap.keySet()) {
      Set> maxChains = getGovMaxChains(govToDepMap, gov, maxLength);
      for (List maxChain : maxChains) {
         for (int i = 1; i <= maxChain.size(); i++) {
           List chain = maxChain.subList(0, i);
           tdc.incrementCount(chain);
         }
      }
    }
    return tdc;
  }

  /** A Comparator for Dependencies based on their dependent annotation.
   *  It will only work if the Labels at the ends of Dependencies have
   *  an index().
   *
   *  @return A Comparator for Dependencies
   */
  public static Comparator dependencyIndexComparator() {
    return ComparatorHolder.dc;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy