edu.stanford.nlp.trees.GrammaticalStructure Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.trees;

import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.*;
import java.util.concurrent.locks.Lock;
import java.util.function.Predicate;
import java.util.function.Function;

import edu.stanford.nlp.graph.DirectedMultiGraph;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.AbstractCoreLabel;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WhitespaceTokenizer;
import edu.stanford.nlp.util.*;

import static edu.stanford.nlp.trees.GrammaticalRelation.DEPENDENT;
import static edu.stanford.nlp.trees.GrammaticalRelation.ROOT;


/**
 * A {@code GrammaticalStructure} stores dependency relations between
 * nodes in a tree.  A new GrammaticalStructure is constructed
 * from an existing parse tree with the help of {@link
 * GrammaticalRelation GrammaticalRelation}, which
 * defines a hierarchy of grammatical relations, along with
 * patterns for identifying them in parse trees.  The constructor for
 * GrammaticalStructure uses these definitions to
 * populate the new GrammaticalStructure with as many
 * labeled grammatical relations as it can.  Once constructed, the new
 * GrammaticalStructure can be printed in various
 * formats, or interrogated using the interface methods in this
 * class. Internally, this uses a representation via a {@code TreeGraphNode},
 * that is, a tree with additional labeled
 * arcs between nodes, for representing the grammatical relations in a
 * parse tree.
 * 
 * Caveat emptor! This is a work in progress.
 * Nothing in here should be relied upon to function perfectly.
 * Feedback welcome.
 *
 * @author Bill MacCartney
 * @author Galen Andrew (refactoring English-specific stuff)
 * @author Ilya Sherman (dependencies)
 * @author Daniel Cer
 * @see EnglishGrammaticalRelations
 * @see GrammaticalRelation
 * @see EnglishGrammaticalStructure
 */
public abstract class GrammaticalStructure implements Serializable {

  private static final boolean PRINT_DEBUGGING = System.getProperty("GrammaticalStructure", null) != null;

  /**
   * A specification for the types of extra edges to add to the dependency tree.
   * If you're in doubt, use {@link edu.stanford.nlp.trees.GrammaticalStructure.Extras#NONE}.
   */
  public static enum Extras {
    /**
     * 
 Don't include any additional edges. 
     * 
     *   Note: In older code (2014 and before) including extras was a boolean flag. This option is the equivalent of
     *   the false flag.
     * 
     */
    NONE(false, false, false),
    /**
     * Include only the extra reference edges, and save them as reference edges without collapsing.
     */
    REF_ONLY_UNCOLLAPSED(true, false, false),
    /**
     * Include only the extra reference edges, but collapsing these edges to clone the edge type of the referent.
     * So, for example, My dog who eats sausage may have a "ref" edge from who to dog
     * that would be deleted and replaced with an "nsubj" edge from eats to dog.
     */
    REF_ONLY_COLLAPSED(true, false, true),
    /**
     * Add extra subjects only, not adding any of the other extra edge types.
     */
    SUBJ_ONLY(false, true, false),
    /**
     * @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#SUBJ_ONLY
     * @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#REF_ONLY_UNCOLLAPSED
     */
    REF_UNCOLLAPSED_AND_SUBJ(true, true, false),
    /**
     * @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#SUBJ_ONLY
     * @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#REF_ONLY_COLLAPSED
     */
    REF_COLLAPSED_AND_SUBJ(true, true, true),
    /**
     * 
     *   Do the maximal amount of extra processing.
     *   Currently, this is equivalent to {@link edu.stanford.nlp.trees.GrammaticalStructure.Extras#REF_COLLAPSED_AND_SUBJ}.
     * 
     * 
     *   Note: In older code (2014 and before) including extras was a boolean flag. This option is the equivalent of
     *   the true flag.
     * 
     */
    MAXIMAL(true, true, true);

    /** Add "ref" edges */
    public final boolean doRef;
    /** Add extra subject edges */
    public final boolean doSubj;
    /** collapse the "ref" edges */
    public final boolean collapseRef;

    /** Constructor. Nothing exciting here. */
    Extras(boolean doRef, boolean doSubj, boolean collapseRef) {
      this.doRef = doRef;
      this.doSubj = doSubj;
      this.collapseRef = collapseRef;
    }

  }

  protected final List typedDependencies;
  protected final List allTypedDependencies;

  protected final Predicate puncFilter;

  /**
   * The root Tree node for this GrammaticalStructure.
   */
  protected final TreeGraphNode root;

  /**
   * A map from arbitrary integer indices to nodes.
   */
  private final Map indexMap = Generics.newHashMap();

  /**
   * Create a new GrammaticalStructure, analyzing the parse tree and
   * populate the GrammaticalStructure with as many labeled
   * grammatical relation arcs as possible.
   *
   * @param t             A Tree to analyze
   * @param relations     A set of GrammaticalRelations to consider
   * @param relationsLock Something needed to make this thread-safe
   * @param transformer   A transformer to apply to the tree before converting
   * @param hf            A HeadFinder for analysis
   * @param puncFilter    A Filter to reject punctuation. To delete punctuation
   *                      dependencies, this filter should return false on
   *                      punctuation word strings, and true otherwise.
   *                      If punctuation dependencies should be kept, you
   *                      should pass in a Filters.<String>acceptFilter().
   */
  public GrammaticalStructure(Tree t, Collection relations,
                              Lock relationsLock, TreeTransformer transformer,
                              HeadFinder hf, Predicate puncFilter) {
    TreeGraphNode treegraph = new TreeGraphNode(t, (TreeGraphNode) null);
    // TODO: create the tree and reuse the leaf labels in one pass,
    // avoiding a wasteful copy of the labels.
    Trees.setLeafLabels(treegraph, t.yield());
    Trees.setLeafTagsIfUnset(treegraph);
    if (transformer != null) {
      Tree transformed = transformer.transformTree(treegraph);
      if (!(transformed instanceof TreeGraphNode)) {
        throw new RuntimeException("Transformer did not change TreeGraphNode into another TreeGraphNode: " + transformer);
      }
      this.root = (TreeGraphNode) transformed;
    } else {
      this.root = treegraph;
    }
    indexNodes(this.root);
    // add head word and tag to phrase nodes
    if (hf == null) {
      throw new AssertionError("Cannot use null HeadFinder");
    }
    root.percolateHeads(hf);
    if (root.value() == null) {
      root.setValue("ROOT");  // todo: cdm: it doesn't seem like this line should be here
    }
    // add dependencies, using heads
    this.puncFilter = puncFilter;
    // NoPunctFilter puncDepFilter = new NoPunctFilter(puncFilter);
    NoPunctTypedDependencyFilter puncTypedDepFilter = new NoPunctTypedDependencyFilter(puncFilter);

    DirectedMultiGraph basicGraph = new DirectedMultiGraph();
    DirectedMultiGraph completeGraph = new DirectedMultiGraph();

    // analyze the root (and its descendants, recursively)
    if (relationsLock != null) {
      relationsLock.lock();
    }
    try {
      analyzeNode(root, root, relations, hf, puncFilter, basicGraph, completeGraph);
    }
    finally {
      if (relationsLock != null) {
        relationsLock.unlock();
      }
    }

    attachStrandedNodes(root, root, false, puncFilter, basicGraph);

    // add typed dependencies
    typedDependencies = getDeps(puncTypedDepFilter, basicGraph);
    allTypedDependencies = Generics.newArrayList(typedDependencies);
    getExtraDeps(allTypedDependencies, puncTypedDepFilter, completeGraph);
  }


  /**
   * Assign sequential integer indices (starting with 1) to all
   * nodes of the subtree rooted at this
   * Tree.  The leaves are indexed first,
   * from left to right.  Then the internal nodes are indexed,
   * using a pre-order tree traversal.
   */
  private void indexNodes(TreeGraphNode tree) {
    indexNodes(tree, indexLeaves(tree, 1));
  }

  /**
   * Assign sequential integer indices to the leaves of the subtree
   * rooted at this TreeGraphNode, beginning with
   * startIndex, and traversing the leaves from left
   * to right. If node is already indexed, then it uses the existing index.
   *
   * @param startIndex index for this node
   * @return the next index still unassigned
   */
  private int indexLeaves(TreeGraphNode tree, int startIndex) {
    if (tree.isLeaf()) {
      int oldIndex = tree.index();
      if (oldIndex >= 0) {
        startIndex = oldIndex;
      } else {
        tree.setIndex(startIndex);
      }
      addNodeToIndexMap(startIndex, tree);
      startIndex++;
    } else {
      for (TreeGraphNode child : tree.children) {
        startIndex = indexLeaves(child, startIndex);
      }
    }
    return startIndex;
  }

  /**
   * Assign sequential integer indices to all nodes of the subtree
   * rooted at this TreeGraphNode, beginning with
   * startIndex, and doing a pre-order tree traversal.
   * Any node which already has an index will not be re-indexed
   * — this is so that we can index the leaves first, and
   * then index the rest.
   *
   * @param startIndex index for this node
   * @return the next index still unassigned
   */
  private int indexNodes(TreeGraphNode tree, int startIndex) {
    if (tree.index() < 0) {		// if this node has no index
      addNodeToIndexMap(startIndex, tree);
      tree.setIndex(startIndex++);
    }
    if (!tree.isLeaf()) {
      for (TreeGraphNode child : tree.children) {
        startIndex = indexNodes(child, startIndex);
      }
    }
    return startIndex;
  }

  /**
   * Store a mapping from an arbitrary integer index to a node in
   * this treegraph.  Normally a client shouldn't need to use this,
   * as the nodes are automatically indexed by the
   * TreeGraph constructor.
   *
   * @param index the arbitrary integer index
   * @param node  the TreeGraphNode to be indexed
   */
  private void addNodeToIndexMap(int index, TreeGraphNode node) {
    indexMap.put(Integer.valueOf(index), node);
  }


  /**
   * Return the node in the this treegraph corresponding to the
   * specified integer index.
   *
   * @param index the integer index of the node you want
   * @return the TreeGraphNode having the specified
   *         index (or null if such does not exist)
   */
  private TreeGraphNode getNodeByIndex(int index) {
    return indexMap.get(Integer.valueOf(index));
  }

  /**
   * Return the root Tree of this GrammaticalStructure.
   *
   * @return the root Tree of this GrammaticalStructure
   */
  public TreeGraphNode root() {
    return root;
  }

  private static void throwDepFormatException(String dep) {
     throw new RuntimeException(String.format("Dependencies should be for the format 'type(arg-idx, arg-idx)'. Could not parse '%s'", dep));
  }

  /**
   * Create a grammatical structure from its string representation.
   *
   * Like buildCoNLLXGrammaticalStructure,
   * this method fakes up the parts of the tree structure that are not
   * used by the grammatical relation transformation operations.
   *
   * Note: Added by daniel cer
   *
   * @param tokens
   * @param posTags
   * @param deps
   */
  public static GrammaticalStructure fromStringReps(List tokens, List posTags, List deps) {
    if (tokens.size() != posTags.size()) {
      throw new RuntimeException(String.format(
              "tokens.size(): %d != pos.size(): %d%n", tokens.size(), posTags
                      .size()));
    }

    List tgWordNodes = new ArrayList(tokens.size());
    List tgPOSNodes = new ArrayList(tokens.size());

    CoreLabel rootLabel = new CoreLabel();
    rootLabel.setValue("ROOT");
    List nodeWords = new ArrayList(tgPOSNodes.size() + 1);
    nodeWords.add(new IndexedWord(rootLabel));

    SemanticHeadFinder headFinder = new SemanticHeadFinder();

    Iterator posIter = posTags.iterator();
    for (String wordString : tokens) {
      String posString = posIter.next();
      CoreLabel wordLabel = new CoreLabel();
      wordLabel.setWord(wordString);
      wordLabel.setValue(wordString);
      wordLabel.setTag(posString);
      TreeGraphNode word = new TreeGraphNode(wordLabel);
      CoreLabel tagLabel = new CoreLabel();
      tagLabel.setValue(posString);
      tagLabel.setWord(posString);
      TreeGraphNode pos = new TreeGraphNode(tagLabel);
      tgWordNodes.add(word);
      tgPOSNodes.add(pos);
      TreeGraphNode[] childArr = {word};
      pos.setChildren(childArr);
      word.setParent(pos);
      pos.percolateHeads(headFinder);
      nodeWords.add(new IndexedWord(wordLabel));
    }

    TreeGraphNode root = new TreeGraphNode(rootLabel);

    root.setChildren(tgPOSNodes.toArray(new TreeGraphNode[tgPOSNodes.size()]));

    root.setIndex(0);

    // Build list of TypedDependencies
    List tdeps = new ArrayList(deps.size());

    for (String depString : deps) {
      int firstBracket = depString.indexOf('(');
      if (firstBracket == -1) throwDepFormatException(depString);


      String type = depString.substring(0, firstBracket);

      if (depString.charAt(depString.length() - 1) != ')') throwDepFormatException(depString);

      String args = depString.substring(firstBracket + 1, depString.length() - 1);

      int argSep = args.indexOf(", ");
      if (argSep == -1) throwDepFormatException(depString);

      String parentArg = args.substring(0, argSep);
      String childArg  = args.substring(argSep + 2);
      int parentDash = parentArg.lastIndexOf('-');
      if (parentDash == -1) throwDepFormatException(depString);
      int childDash = childArg.lastIndexOf('-');
      if (childDash == -1) throwDepFormatException(depString);
      //System.err.printf("parentArg: %s%n", parentArg);
      int parentIdx = Integer.parseInt(parentArg.substring(parentDash+1).replace("'", ""));

      int childIdx = Integer.parseInt(childArg.substring(childDash+1).replace("'", ""));

      GrammaticalRelation grel = new GrammaticalRelation(GrammaticalRelation.Language.Any, type, null, DEPENDENT);

      TypedDependency tdep = new TypedDependency(grel, nodeWords.get(parentIdx), nodeWords.get(childIdx));
      tdeps.add(tdep);
    }

    // TODO add some elegant way to construct language
    // appropriate GrammaticalStructures (e.g., English, Chinese, etc.)
    return new GrammaticalStructure(tdeps, root) {
      private static final long serialVersionUID = 1L;
    };
  }

  public GrammaticalStructure(List projectiveDependencies, TreeGraphNode root) {
    this.root = root;
    indexNodes(this.root);
    this.puncFilter = Filters.acceptFilter();
    allTypedDependencies = typedDependencies = new ArrayList(projectiveDependencies);
  }

  public GrammaticalStructure(Tree t, Collection relations,
                              HeadFinder hf, Predicate puncFilter) {
    this(t, relations, null, null, hf, puncFilter);
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append(root.toPrettyString(0).substring(1));
    sb.append("Typed Dependencies:\n");
    sb.append(typedDependencies);
    return sb.toString();
  }

  private static void attachStrandedNodes(TreeGraphNode t, TreeGraphNode root, boolean attach, Predicate puncFilter, DirectedMultiGraph basicGraph) {
    if (t.isLeaf()) {
      return;
    }
    if (attach && puncFilter.test(t.headWordNode().label().value())) {
      // make faster by first looking for links from parent
      // it is necessary to look for paths using all directions
      // because sometimes there are edges created from lower nodes to
      // nodes higher up
      TreeGraphNode parent = t.parent().highestNodeWithSameHead();
      if (!basicGraph.isEdge(parent, t) && basicGraph.getShortestPath(root, t, false) == null) {
        basicGraph.add(parent, t, GrammaticalRelation.DEPENDENT);
      }
    }
    for (TreeGraphNode kid : t.children()) {
      attachStrandedNodes(kid, root, (kid.headWordNode() != t.headWordNode()), puncFilter, basicGraph);
    }
  }

  // cdm dec 2009: I changed this to automatically fail on preterminal nodes, since they shouldn't match for GR parent patterns.  Should speed it up.
  private static void analyzeNode(TreeGraphNode t, TreeGraphNode root, Collection relations, HeadFinder hf, Predicate puncFilter, DirectedMultiGraph basicGraph, DirectedMultiGraph completeGraph) {
    if (t.isPhrasal()) {    // don't do leaves or preterminals!
      TreeGraphNode tHigh = t.highestNodeWithSameHead();
      for (GrammaticalRelation egr : relations) {
        if (egr.isApplicable(t)) {
          for (TreeGraphNode u : egr.getRelatedNodes(t, root, hf)) {
            TreeGraphNode uHigh = u.highestNodeWithSameHead();
            if (uHigh == tHigh) {
              continue;
            }
            if (!puncFilter.test(uHigh.headWordNode().label().value())) {
              continue;
            }
            completeGraph.add(tHigh, uHigh, egr);
            // If there are two patterns that add dependencies, X --> Z and Y --> Z, and X dominates Y, then the dependency Y --> Z is not added to the basic graph to prevent unwanted duplication.
            // Similarly, if there is already a path from X --> Y, and an expression would trigger Y --> X somehow, we ignore that
            Set parents = basicGraph.getParents(uHigh);
            if ((parents == null || parents.size() == 0 || parents.contains(tHigh)) &&
                basicGraph.getShortestPath(uHigh, tHigh, true) == null) {
              // System.err.println("Adding " + egr.getShortName() + " from " + t + " to " + u + " tHigh=" + tHigh + "(" + tHigh.headWordNode() + ") uHigh=" + uHigh + "(" + uHigh.headWordNode() + ")");
              basicGraph.add(tHigh, uHigh, egr);
            }
          }
        }
      }
      // now recurse into children
      for (TreeGraphNode kid : t.children()) {
        analyzeNode(kid, root, relations, hf, puncFilter, basicGraph, completeGraph);
      }
    }
  }

  private void getExtraDeps(List deps, Predicate puncTypedDepFilter, DirectedMultiGraph completeGraph) {
    getExtras(deps);
    // adds stuff to basicDep based on the tregex patterns over the tree
    getTreeDeps(deps, completeGraph, puncTypedDepFilter, extraTreeDepFilter());
    Collections.sort(deps);
  }

  /**
   * Helps the constructor build a list of typed dependencies using
   * information from a {@code GrammaticalStructure}.
   */
  private List getDeps(Predicate puncTypedDepFilter, DirectedMultiGraph basicGraph) {
    List basicDep = Generics.newArrayList();

    for (TreeGraphNode gov : basicGraph.getAllVertices()) {
      for (TreeGraphNode dep : basicGraph.getChildren(gov)) {
        GrammaticalRelation reln = getGrammaticalRelationCommonAncestor(gov.headWordNode().label(), gov.label(), dep.headWordNode().label(), dep.label(), basicGraph.getEdges(gov, dep));
        // System.err.println("  Gov: " + gov + " Dep: " + dep + " Reln: " + reln);
        basicDep.add(new TypedDependency(reln, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label())));
      }
    }

    // add the root
    TreeGraphNode dependencyRoot = new TreeGraphNode(new Word("ROOT"));
    dependencyRoot.setIndex(0);
    TreeGraphNode rootDep = root().headWordNode();
    if (rootDep == null) {
      List leaves = Trees.leaves(root());
      if (leaves.size() > 0) {
        Tree leaf = leaves.get(0);
        if (!(leaf instanceof TreeGraphNode)) {
          throw new AssertionError("Leaves should be TreeGraphNodes");
        }
        rootDep = (TreeGraphNode) leaf;
        if (rootDep.headWordNode() != null) {
          rootDep = rootDep.headWordNode();
        }
      }
    }
    if (rootDep != null) {
      TypedDependency rootTypedDep = new TypedDependency(ROOT, new IndexedWord(dependencyRoot.label()), new IndexedWord(rootDep.label()));
      if (puncTypedDepFilter.test(rootTypedDep)) {
        basicDep.add(rootTypedDep);
      }
    }

    postProcessDependencies(basicDep);

    Collections.sort(basicDep);

    return basicDep;
  }

  /**
   * Returns a Filter which checks dependencies for usefulness as
   * extra tree-based dependencies.  By default, everything is
   * accepted.  One example of how this can be useful is in the
   * English dependencies, where the REL dependency is used as an
   * intermediate and we do not want this to be added when we make a
   * second pass over the trees for missing dependencies.
   */
  protected Predicate extraTreeDepFilter() {
    return Filters.acceptFilter();
  }

  /**
   * Post process the dependencies in whatever way this language
   * requires.  For example, English might replace "rel" dependencies
   * with either dobj or pobj depending on the surrounding
   * dependencies.
   */
  protected void postProcessDependencies(List basicDep) {
    // no post processing by default
  }

  /**
   * Get extra dependencies that do not depend on the tree structure,
   * but rather only depend on the existing dependency structure.
   * For example, the English xsubj dependency can be extracted that way.
   */
  protected void getExtras(List basicDep) {
    // no extra dependencies by default
  }


  /** Look through the tree t and adds to the List basicDep
   *  additional dependencies which aren't
   *  in the List but which satisfy the filter puncTypedDepFilter.
   *
   * @param deps The list of dependencies which may be augmented
   * @param completeGraph a graph of all the tree dependencies found earlier
   * @param puncTypedDepFilter The filter that may skip punctuation dependencies
   * @param extraTreeDepFilter Additional dependencies are added only if they pass this filter
   */
  private static void getTreeDeps(List deps,
                                  DirectedMultiGraph completeGraph,
                                  Predicate puncTypedDepFilter,
                                  Predicate extraTreeDepFilter) {
    for (TreeGraphNode gov : completeGraph.getAllVertices()) {
      for (TreeGraphNode dep : completeGraph.getChildren(gov)) {
        for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) {
          TypedDependency newDep = new TypedDependency(rel, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label()));
          if (!deps.contains(newDep) && puncTypedDepFilter.test(newDep) && extraTreeDepFilter.test(newDep)) {
            newDep.setExtra();
            deps.add(newDep);
          }
        }
      }
    }
  }

  private static class NoPunctFilter implements Predicate>, Serializable {
    private Predicate npf;

    NoPunctFilter(Predicate f) {
      this.npf = f;
    }

    @Override
    public boolean test(Dependency d) {
      if (d == null) {
        return false;
      }
      Label lab = d.dependent();
      if (lab == null) {
        return false;
      }
      return npf.test(lab.value());
    }

    // Automatically generated by Eclipse
    private static final long serialVersionUID = -2319891944796663180L;
  } // end static class NoPunctFilter


  private static class NoPunctTypedDependencyFilter implements Predicate, Serializable {
    private Predicate npf;

    NoPunctTypedDependencyFilter(Predicate f) {
      this.npf = f;
    }

    @Override
    public boolean test(TypedDependency d) {
      if (d == null) return false;

      IndexedWord l = d.dep();
      if (l == null) return false;

      return npf.test(l.value());
    }

    // Automatically generated by Eclipse
    private static final long serialVersionUID = -2872766864289207468L;
  } // end static class NoPunctTypedDependencyFilter


  /**
   * Get GrammaticalRelation between gov and dep, and null if gov  is not the
   * governor of dep
   */
  public GrammaticalRelation getGrammaticalRelation(int govIndex, int depIndex) {
    TreeGraphNode gov = getNodeByIndex(govIndex);
    TreeGraphNode dep = getNodeByIndex(depIndex);
    // TODO: this is pretty ugly
    return getGrammaticalRelation(new IndexedWord(gov.label()), new IndexedWord(dep.label()));
  }

  /**
   * Get GrammaticalRelation between gov and dep, and null if gov is not the
   * governor of dep
   */
  public GrammaticalRelation getGrammaticalRelation(IndexedWord gov, IndexedWord dep) {
    List labels = Generics.newArrayList();
    for (TypedDependency dependency : typedDependencies(Extras.MAXIMAL)) {
      if (dependency.gov().equals(gov) && dependency.dep().equals(dep)) {
        labels.add(dependency.reln());
      }
    }

    return getGrammaticalRelationCommonAncestor(gov, gov, dep, dep, labels);
  }

  /**
   * Returns the GrammaticalRelation which is the highest common
   * ancestor of the list of relations passed in.  The Labels are
   * passed in only for debugging reasons.  gov & dep are the
   * labels with the text, govH and depH can be higher labels in the
   * tree which represent the category
   */
  private static GrammaticalRelation getGrammaticalRelationCommonAncestor(AbstractCoreLabel gov, AbstractCoreLabel govH, AbstractCoreLabel dep, AbstractCoreLabel depH, List labels) {
    GrammaticalRelation reln = GrammaticalRelation.DEPENDENT;

    List sortedLabels;
    if (labels.size() <= 1) {
      sortedLabels = labels;
    } else {
      sortedLabels = new ArrayList(labels);
      Collections.sort(sortedLabels, new NameComparator());
    }
    // System.err.println(" gov " + govH + " dep " + depH + " arc labels: " + sortedLabels);

    for (GrammaticalRelation reln2 : sortedLabels) {
      if (reln.isAncestor(reln2)) {
        reln = reln2;
      } else if (PRINT_DEBUGGING && ! reln2.isAncestor(reln)) {
        System.err.println("@@@\t" + reln + "\t" + reln2 + "\t" +
                           govH.get(CoreAnnotations.ValueAnnotation.class) + "\t" + depH.get(CoreAnnotations.ValueAnnotation.class));
      }
    }
    if (PRINT_DEBUGGING && reln.equals(GrammaticalRelation.DEPENDENT)) {
      String topCat = govH.get(CoreAnnotations.ValueAnnotation.class);
      String topTag = gov.tag();
      String topWord = gov.value();
      String botCat = depH.get(CoreAnnotations.ValueAnnotation.class);
      String botTag = dep.tag();
      String botWord = dep.value();
      System.err.println("### dep\t" + topCat + "\t" + topTag + "\t" + topWord +
                         "\t" + botCat + "\t" + botTag + "\t" + botWord + "\t");
    }
    return reln;
  }

  private static List removeGrammaticalRelationAncestors(List original) {
    List filtered = Generics.newArrayList();
    for (GrammaticalRelation reln : original) {
      boolean descendantFound = false;
      for (int index = 0; index < filtered.size(); ++index) {
        GrammaticalRelation gr = filtered.get(index);
        //if the element in the list is an ancestor of the current
        //relation, remove it (we will replace it later)
        if (gr.isAncestor(reln)) {
          filtered.remove(index);
          --index;
        } else if (reln.isAncestor(gr)) {
          //if the relation is not an ancestor of an element in the
          //list, we add the relation
          descendantFound = true;
        }
      }
      if (!descendantFound) {
        filtered.add(reln);
      }
    }
    return filtered;
  }


  /**
   * Returns the typed dependencies of this grammatical structure.  These
   * are the basic word-level typed dependencies, where each word is dependent
   * on one other thing, either a word or the starting ROOT, and the
   * dependencies have a tree structure.  This corresponds to the
   * command-line option "basicDependencies".
   *
   * @return The typed dependencies of this grammatical structure
   */
  public Collection typedDependencies() {
    return typedDependencies(Extras.NONE);
  }


  /**
   * Returns all the typed dependencies of this grammatical structure.
   * These are like the basic (uncollapsed) dependencies, but may include
   * extra arcs for control relationships, etc. This corresponds to the
   * "nonCollapsed" option.
   */
  public Collection allTypedDependencies() {
    return typedDependencies(Extras.MAXIMAL);
  }


  /**
   * Returns the typed dependencies of this grammatical structure. These
   * are non-collapsed dependencies (basic or nonCollapsed).
   *
   * @param includeExtras If true, the list of typed dependencies
   * returned may include "extras", and does not follow a tree structure.
   * @return The typed dependencies of this grammatical structure
   */
  public List typedDependencies(Extras includeExtras) {
    List deps;
    // This copy has to be done because of the broken way
    // TypedDependency objects can be mutated by downstream methods
    // such as collapseDependencies.  Without the copy here it is
    // possible for two consecutive calls to
    // typedDependenciesCollapsed to get different results.  For
    // example, the English dependencies rename existing objects KILL
    // to note that they should be removed.
    if (includeExtras != Extras.NONE) {
      deps = new ArrayList(allTypedDependencies.size());
      for (TypedDependency dep : allTypedDependencies) {
        deps.add(new TypedDependency(dep));
      }
    } else {
      deps = new ArrayList(typedDependencies.size());
      for (TypedDependency dep : typedDependencies) {
        deps.add(new TypedDependency(dep));
      }
    }
    correctDependencies(deps);
    return deps;
  }

  /**
   * @see edu.stanford.nlp.trees.GrammaticalStructure#typedDependencies(edu.stanford.nlp.trees.GrammaticalStructure.Extras)
   */
  @Deprecated
  public List typedDependencies(boolean includeExtras) {
    return typedDependencies(includeExtras ? Extras.MAXIMAL : Extras.NONE);
  }

  /**
   * Get the typed dependencies after collapsing them.
   * Collapsing dependencies refers to turning certain function words
   * such as prepositions and conjunctions into arcs, so they disappear from
   * the set of nodes.
   * There is no guarantee that the dependencies are a tree. While the
   * dependencies are normally tree-like, the collapsing may introduce
   * not only re-entrancies but even small cycles.
   *
   * @return A set of collapsed dependencies
   */
  public Collection typedDependenciesCollapsed() {
    return typedDependenciesCollapsed(Extras.NONE);
  }

  // todo [cdm 2012]: The semantics of this method is the opposite of the others.
  // The other no argument methods correspond to includeExtras being
  // true, but for this one it is false.  This should probably be made uniform.
  /**
   * Get the typed dependencies after mostly collapsing them, but keep a tree
   * structure.  In order to do this, the code does:
   * 
   *  no relative clause processing
   * 
 no xsubj relations
   * 
 no propagation of conjuncts
   * 
   * This corresponds to the "tree" option.
   *
   * @return collapsed dependencies keeping a tree structure
   */
  public Collection typedDependenciesCollapsedTree() {
    List tdl = typedDependencies(Extras.NONE);
    collapseDependenciesTree(tdl);
    return tdl;
  }

  /**
   * Get the typed dependencies after collapsing them.
   * The "collapsed" option corresponds to calling this method with argument
   * {@code true}.
   *
   * @param includeExtras If true, the list of typed dependencies
   * returned may include "extras", like controlling subjects
   * @return collapsed dependencies
   */
  public List typedDependenciesCollapsed(Extras includeExtras) {
    List tdl = typedDependencies(includeExtras);
    collapseDependencies(tdl, false, includeExtras);
    return tdl;
  }

  /**
   * @see edu.stanford.nlp.trees.GrammaticalStructure#typedDependenciesCollapsed(edu.stanford.nlp.trees.GrammaticalStructure.Extras)
   */
  @Deprecated
  public List typedDependenciesCollapsed(boolean includeExtras) {
    return typedDependenciesCollapsed(includeExtras ? Extras.MAXIMAL : Extras.NONE);
  }


  /**
   * Get the typed dependencies after collapsing them and processing eventual
   * CC complements.  The effect of this part is to distributed conjoined
   * arguments across relations or conjoined predicates across their arguments.
   * This is generally useful, and we generally recommend using the output of
   * this method with the second argument being {@code true}.
   * The "CCPropagated" option corresponds to calling this method with an
   * argument of {@code true}.
   *
   * @param includeExtras If true, the list of typed dependencies
   * returned may include "extras", such as controlled subject links.
   * @return collapsed dependencies with CC processed
   */
  public List typedDependenciesCCprocessed(Extras includeExtras) {
    List tdl = typedDependencies(includeExtras);
    collapseDependencies(tdl, true, includeExtras);
    return tdl;
  }

  /**
   * @see edu.stanford.nlp.trees.GrammaticalStructure#typedDependenciesCCprocessed(edu.stanford.nlp.trees.GrammaticalStructure.Extras)
   */
  @Deprecated
  public List typedDependenciesCCprocessed(boolean includeExtras) {
    return typedDependenciesCCprocessed(includeExtras ? Extras.MAXIMAL : Extras.NONE);
  }


  /**
   * Get a list of the typed dependencies, including extras like control
   * dependencies, collapsing them and distributing relations across
   * coordination.  This method is generally recommended for best
   * representing the semantic and syntactic relations of a sentence. In
   * general it returns a directed graph (i.e., the output may not be a tree
   * and it may contain (small) cycles).
   * The "CCPropagated" option corresponds to calling this method.
   *
   * @return collapsed dependencies with CC processed
   */
  public List typedDependenciesCCprocessed() {
    return typedDependenciesCCprocessed(Extras.MAXIMAL);
  }


  /**
   * Destructively modify the Collection<TypedDependency> to collapse
   * language-dependent transitive dependencies.
   * 
   * Default is no-op; to be over-ridden in subclasses.
   *
   * @param list A list of dependencies to process for possible collapsing
   * @param CCprocess apply CC process?
   */
  protected void collapseDependencies(List list, boolean CCprocess, Extras includeExtras) {
    // do nothing as default operation
  }

  /**
   * Destructively modify the Collection<TypedDependency> to collapse
   * language-dependent transitive dependencies but keeping a tree structure.
   * 

   * Default is no-op; to be over-ridden in subclasses.
   *
   * @param list A list of dependencies to process for possible collapsing
   *
   */
  protected void collapseDependenciesTree(List list) {
    // do nothing as default operation
  }


  /**
   * Destructively modify the TypedDependencyGraph to correct
   * language-dependent dependencies. (e.g., nsubjpass in a relative clause)
   * 

   * Default is no-op; to be over-ridden in subclasses.
   *
   */
  protected void correctDependencies(Collection list) {
    // do nothing as default operation
  }


  /**
   * Checks if all the typeDependencies are connected
   * @param list a list of typedDependencies
   * @return true if the list represents a connected graph, false otherwise
   */
  public static boolean isConnected(Collection list) {
    return getRoots(list).size() <= 1; // there should be no more than one root to have a connected graph
                                         // there might be no root in the way we look when you have a relative clause
                                         // ex.: Apple is a society that sells computers
                                         // (the root "society" will also be the nsubj of "sells")
  }

  /**
   * Return a list of TypedDependencies which are not dependent on any node from the list.
   *
   * @param list The list of TypedDependencies to check
   * @return A list of TypedDependencies which are not dependent on any node from the list
   */
  public static Collection getRoots(Collection list) {

    Collection roots = new ArrayList();

    // need to see if more than one governor is not listed somewhere as a dependent
    // first take all the deps
    Collection deps = Generics.newHashSet();
    for (TypedDependency typedDep : list) {
      deps.add(typedDep.dep());
    }

    // go through the list and add typedDependency for which the gov is not a dep
    Collection govs = Generics.newHashSet();
    for (TypedDependency typedDep : list) {
      IndexedWord gov = typedDep.gov();
      if (!deps.contains(gov) && !govs.contains(gov)) {
        roots.add(typedDep);
      }
      govs.add(gov);
    }
    return roots;
  }

  private static final long serialVersionUID = 2286294455343892678L;

  private static class NameComparator implements Comparator {
    @Override
    public int compare(X o1, X o2) {
      String n1 = o1.toString();
      String n2 = o2.toString();
      return n1.compareTo(n2);
    }
  }


  public static final String DEFAULT_PARSER_FILE = "/u/nlp/data/lexparser/englishPCFG.ser.gz";

  /**
   * Print typed dependencies in either the Stanford dependency representation
   * or in the conllx format.
   *
   * @param deps
   *          Typed dependencies to print
   * @param tree
   *          Tree corresponding to typed dependencies (only necessary if conllx
   *          == true)
   * @param conllx
   *          If true use conllx format, otherwise use Stanford representation
   * @param extraSep
   *          If true, in the Stanford representation, the extra dependencies
   *          (which do not preserve the tree structure) are printed after the
   *          basic dependencies
   */
  public static void printDependencies(GrammaticalStructure gs, Collection deps, Tree tree, boolean conllx, boolean extraSep) {
    System.out.println(dependenciesToString(gs, deps, tree, conllx, extraSep));
  }

  public static String dependenciesToString(GrammaticalStructure gs, Collection deps, Tree tree, boolean conllx, boolean extraSep) {
    StringBuilder bf = new StringBuilder();

    Map indexToPos = Generics.newHashMap();
    indexToPos.put(0,0); // to deal with the special node "ROOT"
    List gsLeaves = gs.root.getLeaves();
    for (int i = 0; i < gsLeaves.size(); i++) {
      TreeGraphNode leaf = (TreeGraphNode) gsLeaves.get(i);
      indexToPos.put(leaf.label.index(), i + 1);
    }

    if (conllx) {

      List leaves = tree.getLeaves();
      Tree uposTree = UniversalPOSMapper.mapTree(tree);
      List uposLabels = uposTree.preTerminalYield();
      String[] words = new String[leaves.size()];
      String[] pos = new String[leaves.size()];
      String[] upos = new String[leaves.size()];

      String[] relns = new String[leaves.size()];
      int[] govs = new int[leaves.size()];

      int index = 0;
      for (Tree leaf : leaves) {
        index++;
        if (!indexToPos.containsKey(index)) {
          continue;
        }
        int depPos = indexToPos.get(index) - 1;
        words[depPos] = leaf.value();
        pos[depPos] = leaf.parent(tree).value(); // use slow, but safe, parent look up
        upos[depPos] = uposLabels.get(index - 1).value();
      }

      for (TypedDependency dep : deps) {
        int depPos = indexToPos.get(dep.dep().index()) - 1;
        govs[depPos] = indexToPos.get(dep.gov().index());
        relns[depPos] = dep.reln().toString();
      }

      for (int i = 0; i < relns.length; i++) {
        if (words[i] == null) {
          continue;
        }
        String out = String.format("%d\t%s\t_\t%s\t%s\t_\t%d\t%s\t_\t_\n", i + 1, words[i], upos[i], pos[i], govs[i], (relns[i] != null ? relns[i] : "erased"));
        bf.append(out);
      }

    } else {
      if (extraSep) {
        List extraDeps = new ArrayList();
        for (TypedDependency dep : deps) {
          if (dep.extra()) {
            extraDeps.add(dep);
          } else {
            bf.append(toStringIndex(dep, indexToPos));
            bf.append("\n");
          }
        }
        // now we print the separator for extra dependencies, and print these if
        // there are some
        if (!extraDeps.isEmpty()) {
          bf.append("======\n");
          for (TypedDependency dep : extraDeps) {
            bf.append(toStringIndex(dep, indexToPos));
            bf.append("\n");
          }
        }
      } else {
        for (TypedDependency dep : deps) {
          bf.append(toStringIndex(dep, indexToPos));
          bf.append("\n");
        }
      }
    }

    return bf.toString();
  }

  private static String toStringIndex(TypedDependency td, Map indexToPos) {
    IndexedWord gov = td.gov();
    IndexedWord dep = td.dep();
    return td.reln() + "(" + gov.value() + "-" + indexToPos.get(gov.index()) + gov.toPrimes() + ", " + dep.value() + "-" + indexToPos.get(dep.index()) + dep.toPrimes() + ")";
  }


  // Note that these field constants are 0-based whereas much documentation is 1-based

  public static final int CoNLLX_WordField = 1;
  public static final int CoNLLX_POSField = 3;
  public static final int CoNLLX_GovField = 6;
  public static final int CoNLLX_RelnField = 7;

  public static final int CoNLLX_FieldCount = 10;

  /**
   * Read in a file containing a CoNLL-X dependency treebank and return a
   * corresponding list of GrammaticalStructures.
   *
   * @throws IOException
   */
  public static List readCoNLLXGrammaticalStructureCollection(String fileName, Map shortNameToGRel, GrammaticalStructureFromDependenciesFactory factory) throws IOException {
    LineNumberReader reader = new LineNumberReader(IOUtils.readerFromString(fileName));
    List gsList = new LinkedList();

    List> tokenFields = new ArrayList>();

    for (String inline = reader.readLine(); inline != null;
         inline = reader.readLine()) {
      if ( ! inline.isEmpty()) {
        // read in a single sentence token by token
        List fields = Arrays.asList(inline.split("\t"));
        if (fields.size() != CoNLLX_FieldCount) {
          throw new RuntimeException(String.format("Error (line %d): 10 fields expected but %d are present", reader.getLineNumber(), fields.size()));
        }
        tokenFields.add(fields);
      } else {
        if (tokenFields.isEmpty())
          continue; // skip excess empty lines

        gsList.add(buildCoNLLXGrammaticalStructure(tokenFields, shortNameToGRel, factory));
        tokenFields = new ArrayList>();
      }
    }

    return gsList;
  }

  public static GrammaticalStructure
  buildCoNLLXGrammaticalStructure(List> tokenFields,
                                Map shortNameToGRel,
                                GrammaticalStructureFromDependenciesFactory factory) {
    List tgWords = new ArrayList(tokenFields.size());
    List tgPOSNodes = new ArrayList(tokenFields.size());

    SemanticHeadFinder headFinder = new SemanticHeadFinder();

    // Construct TreeGraphNodes for words and POS tags
    for (List fields : tokenFields) {
      CoreLabel word = new CoreLabel();
      word.setValue(fields.get(CoNLLX_WordField));
      word.setWord(fields.get(CoNLLX_WordField));
      word.setTag(fields.get(CoNLLX_POSField));
      word.setIndex(tgWords.size() + 1);
      CoreLabel pos = new CoreLabel();
      pos.setTag(fields.get(CoNLLX_POSField));
      pos.setValue(fields.get(CoNLLX_POSField));
      TreeGraphNode wordNode = new TreeGraphNode(word);
      TreeGraphNode posNode =new TreeGraphNode(pos);
      tgWords.add(new IndexedWord(word));
      tgPOSNodes.add(posNode);
      TreeGraphNode[] childArr = { wordNode };
      posNode.setChildren(childArr);
      wordNode.setParent(posNode);
      posNode.percolateHeads(headFinder);
    }

    // We fake up the parts of the tree structure that are not
    // actually used by the grammatical relation transformation
    // operations.
    //
    // That is, the constructed TreeGraphs consist of a flat tree,
    // without any phrase bracketing, but that does preserve the
    // parent child relationship between words and their POS tags.
    //
    // e.g. (ROOT (PRP I) (VBD hit) (DT the) (NN ball) (. .))

    TreeGraphNode root =
      new TreeGraphNode(new Word("ROOT-" + (tgPOSNodes.size() + 1)));
    root.setChildren(tgPOSNodes.toArray(new TreeGraphNode[tgPOSNodes.size()]));

    // Build list of TypedDependencies
    List tdeps = new ArrayList(tgWords.size());

    // Create a node outside the tree useful for root dependencies;
    // we want to keep those if they were stored in the conll file

    CoreLabel rootLabel = new CoreLabel();
    rootLabel.setValue("ROOT");
    rootLabel.setWord("ROOT");
    rootLabel.setIndex(0);
    IndexedWord dependencyRoot = new IndexedWord(rootLabel);
    for (int i = 0; i < tgWords.size(); i++) {
      String parentIdStr = tokenFields.get(i).get(CoNLLX_GovField);
      if (parentIdStr == null || parentIdStr.equals(""))
        continue;
      int parentId = Integer.parseInt(parentIdStr) - 1;
      String grelString = tokenFields.get(i).get(CoNLLX_RelnField);
      if (grelString.equals("null") || grelString.equals("erased"))
        continue;
      GrammaticalRelation grel = shortNameToGRel.get(grelString.toLowerCase());
      TypedDependency tdep;
      if (grel == null) {
        if (grelString.toLowerCase().equals("root")) {
          tdep = new TypedDependency(ROOT, dependencyRoot, tgWords.get(i));
        } else {
          throw new RuntimeException("Unknown grammatical relation '" +
                                     grelString + "' fields: " +
                                     tokenFields.get(i) + "\nNode: " +
                                     tgWords.get(i) + "\n" +
                                     "Known Grammatical relations: ["+shortNameToGRel.keySet()+"]" );
        }
      } else {
        if (parentId >= tgWords.size()) {
          System.err.printf("Warning: Invalid Parent Id %d Sentence Length: %d%n", parentId+1, tgWords.size());
          System.err.printf("         Assigning to root (0)%n");
          parentId = -1;
        }
        tdep = new TypedDependency(grel, (parentId == -1 ? dependencyRoot : tgWords.get(parentId)),
                                   tgWords.get(i));
      }
      tdeps.add(tdep);
    }
    return factory.build(tdeps, root);
  }


  private static String[] parseClassConstructArgs(String namePlusArgs) {
    String[] args = StringUtils.EMPTY_STRING_ARRAY;
    String name = namePlusArgs;
    if (namePlusArgs.matches(".*\\([^)]*\\)$")) {
      String argStr = namePlusArgs.replaceFirst("^.*\\(([^)]*)\\)$", "$1");
      args = argStr.split(",");
      name = namePlusArgs.replaceFirst("\\([^)]*\\)$", "");
    }
    String[] tokens = new String[1 + args.length];
    tokens[0] = name;
    System.arraycopy(args, 0, tokens, 1, args.length);
    return tokens;
  }


  private static DependencyReader loadAlternateDependencyReader(String altDepReaderName) {
    Class altDepReaderClass = null;
    String[] toks = parseClassConstructArgs(altDepReaderName);
    altDepReaderName = toks[0];
    String[] depReaderArgs = new String[toks.length - 1];
    System.arraycopy(toks, 1, depReaderArgs, 0, toks.length - 1);

    try {
      Class cl = Class.forName(altDepReaderName);
      altDepReaderClass = cl.asSubclass(DependencyReader.class);
    } catch (ClassNotFoundException e) {
      // have a second go below
    }
    if (altDepReaderClass == null) {
      try {
        Class cl = Class.forName("edu.stanford.nlp.trees." + altDepReaderName);
        altDepReaderClass = cl.asSubclass(DependencyReader.class);
      } catch (ClassNotFoundException e) {
          //
      }
    }
    if (altDepReaderClass == null) {
      System.err.println("Can't load dependency reader " + altDepReaderName + " or edu.stanford.nlp.trees." + altDepReaderName);
      return null;
    }

    DependencyReader altDepReader; // initialized below
    if (depReaderArgs.length == 0) {
      try {
        altDepReader = altDepReaderClass.newInstance();
      } catch (InstantiationException e) {
        throw new RuntimeException(e);
      } catch (IllegalAccessException e) {
        System.err.println("No argument constructor to " + altDepReaderName + " is not public");
        return null;
      }
    } else {
      try {
        altDepReader = altDepReaderClass.getConstructor(String[].class).newInstance((Object) depReaderArgs);
      } catch (IllegalArgumentException | SecurityException | InvocationTargetException e) {
        throw new RuntimeException(e);
      } catch (InstantiationException e) {
        e.printStackTrace();
        return null;
      } catch (IllegalAccessException e) {
        System.err.println(depReaderArgs.length + " argument constructor to " + altDepReaderName + " is not public.");
        return null;
      } catch (NoSuchMethodException e) {
        System.err.println("String arguments constructor to " + altDepReaderName + " does not exist.");
        return null;
      }
    }
    return altDepReader;
  }


  private static DependencyPrinter loadAlternateDependencyPrinter(String altDepPrinterName) {
    Class altDepPrinterClass = null;
    String[] toks = parseClassConstructArgs(altDepPrinterName);
    altDepPrinterName = toks[0];
    String[] depPrintArgs = new String[toks.length - 1];
    System.arraycopy(toks, 1, depPrintArgs, 0, toks.length - 1);

    try {
      Class cl = Class.forName(altDepPrinterName);
      altDepPrinterClass = cl.asSubclass(DependencyPrinter.class);
    } catch (ClassNotFoundException e) {
      //
    }
    if (altDepPrinterClass == null) {
      try {
        Class cl = Class.forName("edu.stanford.nlp.trees." + altDepPrinterName);
        altDepPrinterClass = cl.asSubclass(DependencyPrinter.class);
      } catch (ClassNotFoundException e) {
        //
      }
    }
    if (altDepPrinterClass == null) {
      System.err.printf("Unable to load alternative printer %s or %s. Is your classpath set correctly?\n", altDepPrinterName, "edu.stanford.nlp.trees." + altDepPrinterName);
      return null;
    }
    try {
      DependencyPrinter depPrinter;
      if (depPrintArgs.length == 0) {
        depPrinter = altDepPrinterClass.newInstance();
      } else {
        depPrinter = altDepPrinterClass.getConstructor(String[].class).newInstance((Object) depPrintArgs);
      }
      return depPrinter;
    } catch (IllegalArgumentException e) {
      e.printStackTrace();
      return null;
    } catch (SecurityException e) {
      e.printStackTrace();
      return null;
    } catch (InstantiationException e) {
      e.printStackTrace();
      return null;
    } catch (IllegalAccessException e) {
      e.printStackTrace();
      return null;
    } catch (InvocationTargetException e) {
      e.printStackTrace();
      return null;
    } catch (NoSuchMethodException e) {
      if (depPrintArgs.length == 0) {
        System.err.printf("Can't find no-argument constructor %s().%n", altDepPrinterName);
      } else {
        System.err.printf("Can't find constructor %s(%s).%n", altDepPrinterName, Arrays.toString(depPrintArgs));
      }
      return null;
    }
  }

  private static Function, Tree> loadParser(String parserFile, String parserOptions, boolean makeCopulaHead) {
    if (parserFile == null || "".equals(parserFile)) {
      parserFile = DEFAULT_PARSER_FILE;
      if (parserOptions == null) {
        parserOptions = "-retainTmpSubcategories";
      }
    }
    if (parserOptions == null) {
      parserOptions = "";
    }
    if (makeCopulaHead) {
      parserOptions = "-makeCopulaHead " + parserOptions;
    }
    parserOptions = parserOptions.trim();
    // Load parser by reflection, so that this class doesn't require parser
    // for runtime use
    // LexicalizedParser lp = LexicalizedParser.loadModel(parserFile);
    // For example, the tregex package uses TreePrint, which uses
    // GrammaticalStructure, which would then import the
    // LexicalizedParser.  The tagger can read trees, which means it
    // would depend on tregex and therefore depend on the parser.
    Function, Tree> lp;
    try {
      Class[] classes = new Class[] { String.class, String[].class };
      Method method = Class.forName("edu.stanford.nlp.parser.lexparser.LexicalizedParser").getMethod("loadModel", classes);
      String[] opts = {};
      if (parserOptions.length() > 0) {
        opts = parserOptions.split(" +");
      }
      lp = (Function,Tree>) method.invoke(null, parserFile, opts);
    } catch (Exception cnfe) {
      throw new RuntimeException(cnfe);
    }
    return lp;
  }

  /**
   * Allow a collection of trees, that is a Treebank, appear to be a collection
   * of GrammaticalStructures.
   *
   * @author danielcer
   *
   */
  private static class TreeBankGrammaticalStructureWrapper implements Iterable {

    private final Iterable trees;
    private final boolean keepPunct;
    private final TreebankLangParserParams params;

    private final Map origTrees = new WeakHashMap();

    public TreeBankGrammaticalStructureWrapper(Iterable wrappedTrees, boolean keepPunct, TreebankLangParserParams params) {
      trees = wrappedTrees;
      this.keepPunct = keepPunct;
      this.params = params;
    }

    @Override
    public Iterator iterator() {
      return new GsIterator();
    }

    public Tree getOriginalTree(GrammaticalStructure gs) {
      return origTrees.get(gs);
    }


    private class GsIterator implements Iterator {

      private final Iterator tbIterator = trees.iterator();
      private final Predicate puncFilter;
      private final HeadFinder hf;
      private GrammaticalStructure next;

      public GsIterator() {
        // TODO: this is very english specific
        if (keepPunct) {
          puncFilter = Filters.acceptFilter();
        } else {
          puncFilter = new PennTreebankLanguagePack().punctuationWordRejectFilter();
        }
        hf = params.typedDependencyHeadFinder();
        primeGs();
      }

      private void primeGs() {
        GrammaticalStructure gs = null;
        while (gs == null && tbIterator.hasNext()) {
          Tree t = tbIterator.next();
          // System.err.println("GsIterator: Next tree is");
          // System.err.println(t);
          if (t == null) {
            continue;
          }
          try {
            gs = params.getGrammaticalStructure(t, puncFilter, hf);
            origTrees.put(gs, t);
            next = gs;
            // System.err.println("GsIterator: Next tree is");
            // System.err.println(t);
            return;
          } catch (NullPointerException npe) {
            System.err.println("Bung tree caused below dump. Continuing....");
            System.err.println(t);
            npe.printStackTrace();
          }
        }
        next = null;
      }

      @Override
      public boolean hasNext() {
        return next != null;
      }

      @Override
      public GrammaticalStructure next() {
        GrammaticalStructure ret = next;
        if (ret == null) {
          throw new NoSuchElementException();
        }
        primeGs();
        return ret;
      }

      @Override
      public void remove() {
        throw new UnsupportedOperationException();
      }

    }
  } // end static class TreebankGrammaticalStructureWrapper


  /**
   * Given sentences or trees, output the typed dependencies.
   * 


   * By default, the method outputs the collapsed typed dependencies with
   * processing of conjuncts. The input can be given as plain text (one sentence
   * by line) using the option -sentFile, or as trees using the option
   * -treeFile. For -sentFile, the input has to be strictly one sentence per
   * line. You can specify where to find a parser with -parserFile
   * serializedParserPath. See LexicalizedParser for more flexible processing of
   * text files (including with Stanford Dependencies output). The above options
   * assume a file as input. You can also feed trees (only) via stdin by using
   * the option -filter.  If one does not specify a -parserFile, one
   * can specify which language pack to use with -tLPP, This option
   * specifies a class which determines which GrammaticalStructure to
   * use, which HeadFinder to use, etc.  It will default to
   * edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams,
   * but any TreebankLangParserParams can be specified.
   * 

   * If no method of producing trees is given other than to use the
   * LexicalizedParser, but no parser is specified, a default parser
   * is used, the English parser.  You can specify options to load
   * with the parser using the -parserOpts flag.  If the default
   * parser is used, and no options are provided, the option
   * -retainTmpSubcategories is used.
   * 

   * The following options can be used to specify the types of dependencies
   * wanted: 
   * 
   *  -collapsed collapsed dependencies
   * 
 -basic non-collapsed dependencies that preserve a tree structure
   * 
 -nonCollapsed non-collapsed dependencies that do not preserve a tree
   * structure (the basic dependencies plus the extra ones)
   * 
 -CCprocessed
   * collapsed dependencies and conjunctions processed (dependencies are added
   * for each conjunct) -- this is the default if no options are passed
   * 
 -collapsedTree collapsed dependencies retaining a tree structure
   * 
 -makeCopulaHead Contrary to the approach argued for in the SD papers,
   *  nevertheless make the verb 'to be' the head, not the predicate noun, adjective,
   *  etc. (However, when the verb 'to be' is used as an auxiliary verb, the main
   *  verb is still treated as the head.)
   * 
   * 
   * The {@code -conllx} option will output the dependencies in the CoNLL format,
   * instead of in the standard Stanford format (relation(governor,dependent))
   * and will retain punctuation by default.
   * When used in the "collapsed" format, words such as prepositions, conjunctions
   * which get collapsed into the grammatical relations and are not part of the
   * sentence per se anymore will be annotated with "erased" as grammatical relation
   * and attached to the fake "ROOT" node with index 0.
   * 

   * There is also an option to retain dependencies involving punctuation:
   * {@code -keepPunct}
   * 

   * The {@code -extraSep} option used with -nonCollapsed will print the basic
   * dependencies first, then a separator ======, and then the extra
   * dependencies that do not preserve the tree structure. The -test option is
   * used for debugging: it prints the grammatical structure, as well as the
   * basic, collapsed and CCprocessed dependencies. It also checks the
   * connectivity of the collapsed dependencies. If the collapsed dependencies
   * list doesn't constitute a connected graph, it prints the possible offending
   * nodes (one of them is the real root of the graph).
   * 

   * Using the -conllxFile, you can pass a file containing Stanford dependencies
   * in the CoNLL format (e.g., the basic dependencies), and obtain another
   * representation using one of the representation options.
   * 
   * Usage: 

   * java edu.stanford.nlp.trees.GrammaticalStructure [-treeFile FILE | -sentFile FILE | -conllxFile FILE | -filter] 

   * [-collapsed -basic -CCprocessed -test]
   *
   * @param args Command-line arguments, as above
   */
  @SuppressWarnings("unchecked")
  public static void main(String[] args) {

    // System.out.print("GrammaticalRelations under DEPENDENT:");
    // System.out.println(DEPENDENT.toPrettyString());

    MemoryTreebank tb = new MemoryTreebank(new TreeNormalizer());
    Iterable trees = tb;

    Iterable gsBank = null;
    Properties props = StringUtils.argsToProperties(args);

    String encoding = props.getProperty("encoding", "utf-8");
    try {
      System.setOut(new PrintStream(System.out, true, encoding));
    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    String treeFileName = props.getProperty("treeFile");
    String sentFileName = props.getProperty("sentFile");
    String conllXFileName = props.getProperty("conllxFile");
    String altDepPrinterName = props.getProperty("altprinter");
    String altDepReaderName = props.getProperty("altreader");
    String altDepReaderFilename = props.getProperty("altreaderfile");

    String filter = props.getProperty("filter");

    boolean makeCopulaHead = props.getProperty("makeCopulaHead") != null;

    // TODO: if a parser is specified, load this from the parser
    // instead of ever loading it from this way
    String tLPP = props.getProperty("tLPP", "edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams");
    TreebankLangParserParams params = ReflectionLoading.loadByReflection(tLPP);
    if (makeCopulaHead) {
      // TODO: generalize and allow for more options
      String[] options = { "-makeCopulaHead" };
      params.setOptionFlag(options, 0);
    }

    if (sentFileName == null && (altDepReaderName == null || altDepReaderFilename == null) && treeFileName == null && conllXFileName == null && filter == null) {
      try {
        System.err.println("Usage: java GrammaticalStructure [options]* [-sentFile|-treeFile|-conllxFile file] [-testGraph]");
        System.err.println("  options: -basic, -collapsed, -CCprocessed [the default], -collapsedTree, -parseTree, -test, -parserFile file, -conllx, -keepPunct, -altprinter -altreader -altreaderfile");
        TreeReader tr = new PennTreeReader(new StringReader("((S (NP (NNP Sam)) (VP (VBD died) (NP-TMP (NN today)))))"));
        tb.add(tr.readTree());
      } catch (Exception e) {
        System.err.println("Horrible error: " + e);
        e.printStackTrace();
      }
    } else if (altDepReaderName != null && altDepReaderFilename != null) {
      DependencyReader altDepReader = loadAlternateDependencyReader(altDepReaderName);
      try {
        gsBank = altDepReader.readDependencies(altDepReaderFilename);
      } catch (IOException e) {
        System.err.println("Error reading " + altDepReaderFilename);
        return;
      }
    } else if (treeFileName != null) {
      tb.loadPath(treeFileName);
    } else if (filter != null) {
      tb.load(IOUtils.readerFromStdin());
    } else if (conllXFileName != null) {
      try {
        gsBank = params.readGrammaticalStructureFromFile(conllXFileName);
      } catch (RuntimeIOException e) {
        System.err.println("Error reading " + conllXFileName);
        return;
      }
    } else {
      String parserFile = props.getProperty("parserFile");
      String parserOpts = props.getProperty("parserOpts");
      boolean tokenized = props.getProperty("tokenized") != null;
      Function, Tree> lp = loadParser(parserFile, parserOpts, makeCopulaHead);
      trees = new LazyLoadTreesByParsing(sentFileName, encoding, tokenized, lp);

      // Instead of getting this directly from the LP, use reflection
      // so that a package which uses GrammaticalStructure doesn't
      // necessarily have to use LexicalizedParser
      try {
        Method method = lp.getClass().getMethod("getTLPParams");
        params = (TreebankLangParserParams) method.invoke(lp);
      } catch (Exception cnfe) {
        throw new RuntimeException(cnfe);
      }
    }

    // treats the output according to the options passed
    boolean basic = props.getProperty("basic") != null;
    boolean collapsed = props.getProperty("collapsed") != null;
    boolean CCprocessed = props.getProperty("CCprocessed") != null;
    boolean collapsedTree = props.getProperty("collapsedTree") != null;
    boolean nonCollapsed = props.getProperty("nonCollapsed") != null;
    boolean extraSep = props.getProperty("extraSep") != null;
    boolean parseTree = props.getProperty("parseTree") != null;
    boolean test = props.getProperty("test") != null;
    boolean keepPunct = props.getProperty("keepPunct") != null;
    boolean conllx = props.getProperty("conllx") != null;
    // todo: Support checkConnected on more options (including basic)
    boolean checkConnected = props.getProperty("checkConnected") != null;
    boolean portray = props.getProperty("portray") != null;

    // enforce keepPunct if conllx is turned on
    if(conllx) {
      keepPunct = true;
    }

    // If requested load alternative printer
    DependencyPrinter altDepPrinter = null;
    if (altDepPrinterName != null) {
      altDepPrinter = loadAlternateDependencyPrinter(altDepPrinterName);
    }

    // System.err.println("First tree in tb is");
    // System.err.println(((MemoryTreebank) tb).get(0));

    Method m = null;
    if (test) {
      // see if we can use SemanticGraph(Factory) to check for being a DAG
      // Do this by reflection to avoid this becoming a dependency when we distribute the parser
      try {
        Class sgf = Class.forName("edu.stanford.nlp.semgraph.SemanticGraphFactory");
        m = sgf.getDeclaredMethod("makeFromTree", GrammaticalStructure.class, boolean.class, boolean.class, boolean.class, boolean.class, boolean.class, boolean.class, Predicate.class, String.class, int.class);
      } catch (Exception e) {
        System.err.println("Test cannot check for cycles in tree format (classes not available)");
      }
    }

    if (gsBank == null) {
      gsBank = new TreeBankGrammaticalStructureWrapper(trees, keepPunct, params);
    }

    for (GrammaticalStructure gs : gsBank) {

      Tree tree;
      if (gsBank instanceof TreeBankGrammaticalStructureWrapper) {
        // System.err.println("Using TreeBankGrammaticalStructureWrapper branch");
        tree = ((TreeBankGrammaticalStructureWrapper) gsBank).getOriginalTree(gs);
        // System.err.println("Tree is: ");
        // System.err.println(t);
      } else {
        // System.err.println("Using gs.root() branch");
        tree = gs.root(); // recover tree
        // System.err.println("Tree from gs is");
        // System.err.println(t);
      }

      if (test) { // print the grammatical structure, the basic, collapsed and CCprocessed

        System.out.println("============= parse tree =======================");
        tree.pennPrint();
        System.out.println();

        System.out.println("------------- GrammaticalStructure -------------");
        System.out.println(gs);

        boolean allConnected = true;
        boolean connected;
        Collection bungRoots = null;
        System.out.println("------------- basic dependencies ---------------");
        List gsb = gs.typedDependencies(Extras.NONE);
        System.out.println(StringUtils.join(gsb, "\n"));
        connected = GrammaticalStructure.isConnected(gsb);
        if ( ! connected && bungRoots == null) {
          bungRoots = GrammaticalStructure.getRoots(gsb);
        }
        allConnected = connected && allConnected;

        System.out.println("------------- non-collapsed dependencies (basic + extra) ---------------");
        List gse = gs.typedDependencies(Extras.MAXIMAL);
        System.out.println(StringUtils.join(gse, "\n"));
        connected = GrammaticalStructure.isConnected(gse);
        if ( ! connected && bungRoots == null) {
          bungRoots = GrammaticalStructure.getRoots(gse);
        }
        allConnected = connected && allConnected;

        System.out.println("------------- collapsed dependencies -----------");
        System.out.println(StringUtils.join(gs.typedDependenciesCollapsed(Extras.MAXIMAL), "\n"));

        System.out.println("------------- collapsed dependencies tree -----------");
        System.out.println(StringUtils.join(gs.typedDependenciesCollapsedTree(), "\n"));

        System.out.println("------------- CCprocessed dependencies --------");
        List gscc = gs.typedDependenciesCollapsed(Extras.MAXIMAL);
        System.out.println(StringUtils.join(gscc, "\n"));

        System.out.println("-----------------------------------------------");
        // connectivity tests
        connected = GrammaticalStructure.isConnected(gscc);
        if ( ! connected && bungRoots == null) {
          bungRoots = GrammaticalStructure.getRoots(gscc);
        }
        allConnected = connected && allConnected;
        if (allConnected) {
          System.out.println("dependencies form connected graphs.");
        } else {
          System.out.println("dependency graph NOT connected! possible offending nodes: " + bungRoots);
        }

        // test for collapsed dependencies being a tree:
        // make sure at least it doesn't contain cycles (i.e., is a DAG)
        // Do this by reflection so parser doesn't need SemanticGraph and its
        // libraries
        if (m != null) {
          try {
            // the first arg is null because it's a static method....
            Object semGraph = m.invoke(null, gs, false, true, false, false, false, false, null, null, 0);
            Class sg = Class.forName("edu.stanford.nlp.semgraph.SemanticGraph");
            Method mDag = sg.getDeclaredMethod("isDag");
            boolean isDag = (Boolean) mDag.invoke(semGraph);

            System.out.println("tree dependencies form a DAG: " + isDag);
          } catch (Exception e) {
            e.printStackTrace();
          }
        }
      }// end of "test" output

      else {
        if (parseTree) {
          System.out.println("============= parse tree =======================");
          tree.pennPrint();
          System.out.println();
        }

        if (basic) {
          if (collapsed || CCprocessed || collapsedTree || nonCollapsed) {
            System.out.println("------------- basic dependencies ---------------");
          }
          if (altDepPrinter == null) {
            printDependencies(gs, gs.typedDependencies(Extras.NONE), tree, conllx, false);
          } else {
            System.out.println(altDepPrinter.dependenciesToString(gs, gs.typedDependencies(Extras.NONE), tree));
          }
        }

        if (nonCollapsed) {
          if (basic || CCprocessed || collapsed || collapsedTree) {
            System.out.println("----------- non-collapsed dependencies (basic + extra) -----------");
          }
          printDependencies(gs, gs.allTypedDependencies(), tree, conllx, extraSep);
        }

        if (collapsed) {
          if (basic || CCprocessed || collapsedTree || nonCollapsed) {
            System.out.println("----------- collapsed dependencies -----------");
          }
          printDependencies(gs, gs.typedDependenciesCollapsed(Extras.MAXIMAL), tree, conllx, false);
        }

        if (CCprocessed) {
          if (basic || collapsed || collapsedTree || nonCollapsed) {
            System.out.println("---------- CCprocessed dependencies ----------");
          }
          List deps = gs.typedDependenciesCCprocessed(Extras.MAXIMAL);
          if (checkConnected) {
            if (!GrammaticalStructure.isConnected(deps)) {
              System.err.println("Graph is not connected for:");
              System.err.println(tree);
              System.err.println("possible offending nodes: " + GrammaticalStructure.getRoots(deps));
            }
          }
          printDependencies(gs, deps, tree, conllx, false);
        }

        if (collapsedTree) {
          if (basic || CCprocessed || collapsed || nonCollapsed) {
            System.out.println("----------- collapsed dependencies tree -----------");
          }
          printDependencies(gs, gs.typedDependenciesCollapsedTree(), tree, conllx, false);
        }

        // default use: CCprocessed (to parallel what happens within the parser)
        if (!basic && !collapsed && !CCprocessed && !collapsedTree && !nonCollapsed) {
          // System.out.println("----------- CCprocessed dependencies -----------");
          printDependencies(gs, gs.typedDependenciesCCprocessed(Extras.MAXIMAL), tree, conllx, false);
        }
      }

      if (portray) {
        try {
          // put up a window showing it
          Class sgu = Class.forName("edu.stanford.nlp.semgraph.SemanticGraphUtils");
          Method mRender = sgu.getDeclaredMethod("render", GrammaticalStructure.class, String.class);
          // the first arg is null because it's a static method....
          mRender.invoke(null, gs, "Collapsed, CC processed deps");
        } catch (Exception e) {
          throw new RuntimeException("Couldn't use swing to portray semantic graph", e);
        }
      }

    } // end for
  } // end main

  // todo [cdm 2013]: Take this out and make it a trees class: TreeIterableByParsing
  static class LazyLoadTreesByParsing implements Iterable {
    final Reader reader;
    final String filename;
    final boolean tokenized;
    final String encoding;
    final Function, Tree> lp;

    public LazyLoadTreesByParsing(String filename, String encoding, boolean tokenized, Function, Tree> lp) {
      this.filename = filename;
      this.encoding = encoding;
      this.reader = null;
      this.tokenized = tokenized;
      this.lp = lp;
    }
    public LazyLoadTreesByParsing(Reader reader, boolean tokenized, Function, Tree> lp) {
      this.filename = null;
      this.encoding = null;
      this.reader = reader;
      this.tokenized = tokenized;
      this.lp = lp;
    }

    @Override
    public Iterator iterator() {
      final BufferedReader iReader;
      if (reader != null) {
        iReader = new BufferedReader(reader);
      } else {
        try {
          iReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding));
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }

      return new Iterator() {

        String line = null;

        @Override
        public boolean hasNext() {
          if (line != null) {
            return true;
          } else {
            try {
              line = iReader.readLine();
            } catch (IOException e) {
              throw new RuntimeException(e);
            }
            if (line == null) {
              try {
                if (reader == null) iReader.close();
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
              return false;
            }
            return true;
          }
        }

        @Override
        public Tree next() {
          if (line == null) {
            throw new NoSuchElementException();
          }
          Reader lineReader = new StringReader(line);
          line = null;
          List words;
          if (tokenized) {
            words = WhitespaceTokenizer.newWordWhitespaceTokenizer(lineReader).tokenize();
          } else {
            words = PTBTokenizer.newPTBTokenizer(lineReader).tokenize();
          }
          if (!words.isEmpty()) {
            // the parser throws an exception if told to parse an empty sentence.
            Tree parseTree = lp.apply(words);
            return parseTree;
          } else {
            return new SimpleTree();
          }
        }

        @Override
        public void remove() {
          throw new UnsupportedOperationException();
        }

      };
    }

  } // end static class LazyLoadTreesByParsing

}