edu.stanford.nlp.trees.UniversalEnglishGrammaticalStructure Maven / Gradle / Ivy

Go to download

package edu.stanford.nlp.trees;

import java.io.*;
import java.util.*;
import java.util.function.Predicate;

import edu.stanford.nlp.graph.DirectedMultiGraph;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.process.Morphology;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.util.*;
import static edu.stanford.nlp.trees.UniversalEnglishGrammaticalRelations.*;
import static edu.stanford.nlp.trees.GrammaticalRelation.*;

/**
 * A GrammaticalStructure for Universal Dependencies English.
 * 
 * For feeding Stanford parser trees into this class, the Stanford parser should be run with the
 * "-retainNPTmpSubcategories" option for best results!
 *
 * @author Bill MacCartney
 * @author Marie-Catherine de Marneffe
 * @author Christopher Manning
 * @author Daniel Cer (CoNLLX format and alternative user selected dependency
 *         printer/reader interface)
 * @author John Bauer
 * @author Sebastian Schuster
 */
public class UniversalEnglishGrammaticalStructure extends GrammaticalStructure {

  private static final long serialVersionUID = 1L;

  private static final boolean DEBUG = System.getProperty("UniversalEnglishGrammaticalStructure", null) != null;

  /**
   * Construct a new {@code EnglishGrammaticalStructure} from an existing parse
   * tree. The new {@code GrammaticalStructure} has the same tree structure
   * and label values as the given tree (but no shared storage). As part of
   * construction, the parse tree is analyzed using definitions from
   * {@link GrammaticalRelation GrammaticalRelation} to populate
   * the new GrammaticalStructure with as many labeled grammatical
   * relations as it can.
   *
   * @param t Parse tree to make grammatical structure from
   */
  public UniversalEnglishGrammaticalStructure(Tree t) {
    this(t, new PennTreebankLanguagePack().punctuationWordRejectFilter());
  }

  /**
   * This gets used by GrammaticalStructureFactory (by reflection). DON'T DELETE.
   *
   * @param t Parse tree to make grammatical structure from
   * @param tagFilter Filter to remove punctuation dependencies
   */
  public UniversalEnglishGrammaticalStructure(Tree t, Predicate tagFilter) {
    this(t, tagFilter, new UniversalSemanticHeadFinder(true), true);
  }

  /**
   * This gets used by GrammaticalStructureFactory (by reflection). DON'T DELETE.
   *
   * @param t Parse tree to make grammatical structure from
   * @param tagFilter Tag filter to remove punctuation dependencies
   * @param hf HeadFinder to use when building it
   */
  public UniversalEnglishGrammaticalStructure(Tree t, Predicate tagFilter, HeadFinder hf) {
    this(t, tagFilter, hf, true);
  }

  /**
   * Construct a new {@code GrammaticalStructure} from an existing parse
   * tree. The new {@code GrammaticalStructure} has the same tree structure
   * and label values as the given tree (but no shared storage). As part of
   * construction, the parse tree is analyzed using definitions from
   * {@link GrammaticalRelation GrammaticalRelation} to populate
   * the new {@code GrammaticalStructure} with as many labeled grammatical
   * relations as it can.
   *
   * @param t Parse tree to make grammatical structure from
   * @param tagFilter Filter for punctuation tags
   * @param hf HeadFinder to use when building it
   * @param threadSafe Whether or not to support simultaneous instances among multiple
   *          threads
   */
  public UniversalEnglishGrammaticalStructure(Tree t, Predicate tagFilter, HeadFinder hf, boolean threadSafe) {

    // the tree is normalized (for index and functional tag stripping) inside CoordinationTransformer
    super(t, UniversalEnglishGrammaticalRelations.values(), UniversalEnglishGrammaticalRelations.valuesLock(),
            new CoordinationTransformer(hf, true), hf, Filters.acceptFilter(), tagFilter);
  }

  /** Used for postprocessing CoNLL X dependencies */
  public UniversalEnglishGrammaticalStructure(List projectiveDependencies, TreeGraphNode root) {
    super(projectiveDependencies, root);
  }


  /**
   * Returns a Filter which checks dependencies for usefulness as
   * extra tree-based dependencies.  By default, everything is
   * accepted.  One example of how this can be useful is in the
   * English dependencies, where the REL dependency is used as an
   * intermediate and we do not want this to be added when we make a
   * second pass over the trees for missing dependencies.
   */
  @Override
  protected Predicate extraTreeDepFilter() {
    return extraTreeDepFilter;
  }

  private static class ExtraTreeDepFilter implements Predicate, Serializable {
    @Override
    public boolean test(TypedDependency d) {
      return d != null && d.reln() != RELATIVE && d.reln() != PREPOSITION;
    }

    private static final long serialVersionUID = 1L;
  }

  private static final Predicate extraTreeDepFilter = new ExtraTreeDepFilter();


  @Override
  protected void getTreeDeps(List deps,
      DirectedMultiGraph completeGraph,
      Predicate puncTypedDepFilter,
      Predicate extraTreeDepFilter) {
  //Do nothing
  }

  @Override
  protected void correctDependencies(List list) {
    SemanticGraph sg = new SemanticGraph(list);
    correctDependencies(sg);
    list.clear();
    list.addAll(sg.typedDependencies());
    Collections.sort(list);
  }

  protected static void correctDependencies(SemanticGraph sg) {
    if (DEBUG) {
      printListSorted("At correctDependencies:", sg.typedDependencies());
    }
    correctSubjPass(sg);
    if (DEBUG) {
      printListSorted("After correctSubjPass:", sg.typedDependencies());
    }
    removeExactDuplicates(sg);
    if (DEBUG) {
      printListSorted("After removeExactDuplicates:", sg.typedDependencies());
    }
  }

  private static void printListSorted(String title, Collection list) {
    List lis = new ArrayList<>(list);
    Collections.sort(lis);
    if (title != null) {
      System.err.println(title);
    }
    System.err.println(lis);
  }

  @Override
  protected void postProcessDependencies(List list) {
    SemanticGraph sg = new SemanticGraph(list);
    if (DEBUG) {
      printListSorted("At postProcessDependencies:", sg.typedDependencies());
    }
    correctWHAttachment(sg);
    if (DEBUG) {
      printListSorted("After corrrecting WH attachment:", sg.typedDependencies());
    }
    convertRel(sg);
    if (DEBUG) {
      printListSorted("After converting rel:", sg.typedDependencies());
    }
    list.clear();
    list.addAll(sg.typedDependencies());
  }

  @Override
  protected void getExtras(List list) {
    SemanticGraph sg = new SemanticGraph(list);
    addRef(sg);
    if (DEBUG) {
      printListSorted("After adding ref:", sg.typedDependencies());
    }

    addExtraNSubj(sg);
    if (DEBUG) {
      printListSorted("After adding extra nsubj:", sg.typedDependencies());
    }

    list.clear();
    list.addAll(sg.typedDependencies());
  }


  /* Semgrex patterns for prepositional phrases. */
  private static SemgrexPattern PASSIVE_AGENT_PATTERN = SemgrexPattern.compile("{}=gov >nmod=reln ({}=mod >case {word:/^(?i:by)$/}=c1) >auxpass {}");
  private static SemgrexPattern PREP_MW3_PATTERN = SemgrexPattern.compile("{}=gov   [>/^(nmod|advcl|acl)$/=reln ({}=mod >case ({}=c1 >mwe {}=c2 >mwe ({}=c3 !== {}=c2) ))]");
  private static SemgrexPattern PREP_MW2_PATTERN = SemgrexPattern.compile("{}=gov >/^(nmod|advcl|acl)$/=reln ({}=mod >case ({}=c1 >mwe {}=c2))");
  private static SemgrexPattern PREP_PATTERN = SemgrexPattern.compile("{}=gov   >/^(nmod|advcl|acl)$/=reln ({}=mod >case {}=c1)");


  /**
   * Adds the case marker(s) to all nmod, acl and advcl relations that are
   * modified by one or more case markers(s).
   *
   * @see UniversalEnglishGrammaticalStructure#addCaseMarkersToReln
   */
  private static void addCaseMarkerInformation(SemanticGraph sg) {

    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
      return;

    /* passive agent */
    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = PASSIVE_AGENT_PATTERN.matcher(sgCopy);
    while (matcher.find()) {
      IndexedWord caseMarker = matcher.getNode("c1");
      IndexedWord gov = matcher.getNode("gov");
      IndexedWord mod = matcher.getNode("mod");
      addPassiveAgentToReln(sg, gov, mod, caseMarker);
    }

    List oldCaseMarkers = Generics.newArrayList();


    /* 3-word prepositions */
    sgCopy = sg.makeSoftCopy();
    matcher = PREP_MW3_PATTERN.matcher(sgCopy);
    while (matcher.find()) {
      List caseMarkers = Generics.newArrayList(3);
      caseMarkers.add(matcher.getNode("c1"));
      caseMarkers.add(matcher.getNode("c2"));
      caseMarkers.add(matcher.getNode("c3"));

      Collections.sort(caseMarkers);

      /* We only want to match every case marker once. */
      if (caseMarkers.equals(oldCaseMarkers))
        continue;


      IndexedWord gov = matcher.getNode("gov");
      IndexedWord mod = matcher.getNode("mod");

      addCaseMarkersToReln(sg, gov, mod, caseMarkers);

      oldCaseMarkers = caseMarkers;
    }


    /* 2-word prepositions */
    sgCopy = sg.makeSoftCopy();
    matcher = PREP_MW2_PATTERN.matcher(sgCopy);
    while (matcher.find()) {
      List caseMarkers = Generics.newArrayList(2);
      caseMarkers.add(matcher.getNode("c1"));
      caseMarkers.add(matcher.getNode("c2"));
      Collections.sort(caseMarkers);

      /* We only want to match every case marker once. */
      if (caseMarkers.equals(oldCaseMarkers))
        continue;

      IndexedWord gov = matcher.getNode("gov");
      IndexedWord mod = matcher.getNode("mod");
      addCaseMarkersToReln(sg, gov, mod, caseMarkers);

      oldCaseMarkers = caseMarkers;
    }

    /* Single-word prepositions */
    sgCopy = sg.makeSoftCopy();
    matcher = PREP_PATTERN.matcher(sgCopy);
    while (matcher.find()) {
      List caseMarkers = Generics.newArrayList(1);
      caseMarkers.add(matcher.getNode("c1"));

      if (caseMarkers.equals(oldCaseMarkers))
        continue;

      IndexedWord gov = matcher.getNode("gov");
      IndexedWord mod = matcher.getNode("mod");
      addCaseMarkersToReln(sg, gov, mod, caseMarkers);

      oldCaseMarkers = caseMarkers;
    }

  }


  private static void addPassiveAgentToReln(SemanticGraph sg,
      IndexedWord gov, IndexedWord mod, IndexedWord caseMarker) {

    SemanticGraphEdge edge = sg.getEdge(gov, mod);
    GrammaticalRelation reln = UniversalEnglishGrammaticalRelations.getNmod("agent");
    edge.setRelation(reln);
  }


  /**
   * Appends case marker information to nmod/acl/advcl relations.
   * 

   * E.g. if there is a relation nmod(gov, dep) and case(dep, prep), then
   * the nmod relation is renamed to nmod:prep.
   *
   *
   * @param sg semantic graph
   * @param gov governor of the nmod/acl/advcl relation
   * @param mod modifier of the nmod/acl/advcl relation
   * @param caseMarkers List of all the case markers that depend on mod
   */
  private static void addCaseMarkersToReln(SemanticGraph sg, IndexedWord gov, IndexedWord mod, List caseMarkers) {

    SemanticGraphEdge edge = sg.getEdge(gov, mod);
    int lastCaseMarkerIndex = 0;
    StringBuilder sb = new StringBuilder();
    boolean firstWord = true;
    for (IndexedWord cm : caseMarkers) {
      /* check for adjacency */
      if (lastCaseMarkerIndex == 0 || cm.index() == (lastCaseMarkerIndex + 1)) {
        if ( ! firstWord) {
          sb.append("_");
        }
        sb.append(cm.value());
        firstWord = false;
      } else {
        /* Should never happen as there should be never two non-adjacent case markers.
         * If it does happen nevertheless create an additional relation.
         */
        GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
        sg.addEdge(gov, mod, reln, Double.NEGATIVE_INFINITY, true);
        sb = new StringBuilder(cm.value());
        firstWord = true;
      }
      lastCaseMarkerIndex = cm.index();
    }
    GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), sb.toString().toLowerCase());
    edge.setRelation(reln);
  }

  private static final SemgrexPattern PREP_CONJP_PATTERN = SemgrexPattern.compile("{} >case ({}=gov >cc {}=cc >conj {}=conj)");

  /**
   * Expands prepositions with conjunctions such as in the sentence
   * "Bill flies to and from Serbia." by copying the verb resulting
   * in the following relations:
   * 

   * conj:and(flies, flies')

   * case(Serbia, to)

   * cc(to, and)

   * conj(to, from)

   * nmod(flies, Serbia)

   * nmod(flies', Serbia)

   * 

   * The label of the conjunct relation includes the conjunction type
   * because if the verb has multiple cc relations then it can be impossible
   * to infer which coordination marker belongs to which conjuncts.
   *
   * @param sg A SemanticGraph for a sentence
   */
  private static void expandPrepConjunctions(SemanticGraph sg) {

    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
      return;


    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = PREP_CONJP_PATTERN.matcher(sgCopy);

    IndexedWord oldGov = null;
    IndexedWord oldCcDep = null;
    List conjDeps = Generics.newLinkedList();

    while (matcher.find()) {
      IndexedWord ccDep = matcher.getNode("cc");
      IndexedWord conjDep = matcher.getNode("conj");
      IndexedWord gov = matcher.getNode("gov");
      if (oldGov != null &&  (! gov.equals(oldGov) || ! ccDep.equals(oldCcDep))) {
        expandPrepConjunction(sg, oldGov, conjDeps, oldCcDep);
        conjDeps = Generics.newLinkedList();
      }
      oldCcDep = ccDep;
      oldGov = gov;
      conjDeps.add(conjDep);
    }

    if (oldGov != null) {
      expandPrepConjunction(sg, oldGov, conjDeps, oldCcDep);
    }

  }

  /*
   * Used by expandPrepConjunctions.
   */
  private static void expandPrepConjunction(SemanticGraph sg, IndexedWord gov,
      List conjDeps, IndexedWord ccDep)  {

    IndexedWord caseGov = sg.getParent(gov);

    if (caseGov == null)
      return;

    IndexedWord caseGovGov = sg.getParent(caseGov);

    if (caseGovGov == null)
      return;

    IndexedWord conjGov = caseGovGov.getOriginal() != null ? caseGovGov.getOriginal() : caseGovGov;
    GrammaticalRelation rel = sg.reln(caseGovGov, caseGov);
    List newConjDeps = Generics.newLinkedList();
    for (IndexedWord conjDep : conjDeps) {
      //IndexedWord caseGovCopy = caseGov.makeSoftCopy();
      IndexedWord caseGovGovCopy = caseGovGov.makeSoftCopy();

      /* Change conj(prep-1, prep-2) to case(prep-1-gov-copy, prep-2) */
      //SemanticGraphEdge edge = sg.getEdge(gov, conjDep);
      //sg.removeEdge(edge);
      //sg.addEdge(caseGovCopy, conjDep, CASE_MARKER, Double.NEGATIVE_INFINITY, false);

      /* Add relation to copy node. */
      //sg.addEdge(caseGovGovCopy, caseGovCopy, rel, Double.NEGATIVE_INFINITY, false);

      sg.addEdge(conjGov, caseGovGovCopy, CONJUNCT, Double.NEGATIVE_INFINITY, false);
      newConjDeps.add(caseGovGovCopy);

      sg.addEdge(caseGovGovCopy, caseGov, rel, Double.NEGATIVE_INFINITY, true);

      List caseMarkers = Generics.newArrayList();
      caseMarkers.add(conjDep);

      addCaseMarkersToReln(sg, caseGovGovCopy, caseGov, caseMarkers);
      /* Attach all children except case markers of caseGov to caseGovCopy. */
      //for (SemanticGraphEdge e : sg.outgoingEdgeList(caseGov)) {
      //  if (e.getRelation() != CASE_MARKER && ! e.getDependent().equals(ccDep)) {
      //    sg.addEdge(caseGovCopy, e.getDependent(), e.getRelation(), Double.NEGATIVE_INFINITY, false);
      //  }
     // }
    }

    /* Attach CC node to caseGov */
    //SemanticGraphEdge edge = sg.getEdge(gov, ccDep);
    //sg.removeEdge(edge);
    //sg.addEdge(conjGov, ccDep, COORDINATION, Double.NEGATIVE_INFINITY, false);

    /* Add conjunction information for these relations already at this point.
     * It could be that we add several coordinating conjunctions while collapsing
     * and we might not know which conjunction belongs to which conjunct at a later
     * point.
     */
    addConjToReln(sg, conjGov, newConjDeps, ccDep);
  }


  private static SemgrexPattern PP_CONJP_PATTERN = SemgrexPattern.compile("{} >/^(nmod|acl|advcl)$/ (({}=gov >case {}) >cc {}=cc >conj ({}=conj >case {}))");


  /**
   * Expands PPs with conjunctions such as in the sentence
   * "Bill flies to France and from Serbia." by copying the verb
   * that governs the prepositinal phrase resulting in the following
   * relations:
   * 

   * conj:and(flies, flies')

   * case(France, to)

   * cc(flies, and)

   * case(Serbia, from)

   * nmod(flies, France)

   * nmod(flies', Serbia)

   * 

   * The label of the conjunct relation includes the conjunction type
   * because if the verb has multiple cc relations then it can be impossible
   * to infer which coordination marker belongs to which conjuncts.
   *
   * @param sg SemanticGraph to operate on.
   */
  private static void expandPPConjunctions(SemanticGraph sg) {

    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
      return;


    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = PP_CONJP_PATTERN.matcher(sgCopy);

    IndexedWord oldGov = null;
    IndexedWord oldCcDep = null;
    List conjDeps = Generics.newLinkedList();

    while (matcher.find()) {
      IndexedWord conjDep = matcher.getNode("conj");
      IndexedWord gov = matcher.getNode("gov");
      IndexedWord ccDep = matcher.getNode("cc");

      if (oldGov != null &&  (! gov.equals(oldGov) || ! ccDep.equals(oldCcDep))) {
        expandPPConjunction(sg, oldGov, conjDeps, oldCcDep);
        conjDeps = Generics.newLinkedList();
      }
      oldCcDep = ccDep;
      oldGov = gov;
      conjDeps.add(conjDep);
    }

    if (oldGov != null) {
      expandPPConjunction(sg, oldGov, conjDeps, oldCcDep);
    }

  }

  /*
   * Used by expandPPConjunction.
   */
  private static void expandPPConjunction(SemanticGraph sg, IndexedWord gov,
      List conjDeps, IndexedWord ccDep) {

    IndexedWord nmodGov = sg.getParent(gov);

    if (nmodGov == null)
      return;

    IndexedWord conjGov = nmodGov.getOriginal() != null ? nmodGov.getOriginal() : nmodGov;
    GrammaticalRelation rel = sg.reln(nmodGov, gov);
    List newConjDeps = Generics.newLinkedList();
    for (IndexedWord conjDep : conjDeps) {
      IndexedWord nmodGovCopy = nmodGov.makeSoftCopy();

      /* Change conj(nmod-1, nmod-2) to nmod(nmod-1-gov, nmod-2) */
      SemanticGraphEdge edge = sg.getEdge(gov, conjDep);
      if (edge != null) {
        sg.removeEdge(edge);
        sg.addEdge(nmodGovCopy, conjDep, rel, Double.NEGATIVE_INFINITY, false);
      }

      /* Add relation to copy node. */
      sg.addEdge(conjGov, nmodGovCopy, CONJUNCT, Double.NEGATIVE_INFINITY, false);
      newConjDeps.add(nmodGovCopy);
    }

    /* Attach CC node to conjGov */
    SemanticGraphEdge edge = sg.getEdge(gov, ccDep);
    if (edge != null) {
      sg.removeEdge(edge);
      sg.addEdge(conjGov, ccDep, COORDINATION, Double.NEGATIVE_INFINITY, false);
    }

    /* Add conjunction information for these relations already at this point.
     * It could be that we add several coordinating conjunctions while collapsing
     * and we might not know which conjunction belongs to which conjunct at a later
     * point.
     */
    addConjToReln(sg, conjGov, newConjDeps, ccDep);
  }


  /**
   *
   * Returns a GrammaticalRelation which combines the original relation and
   * the preposition.
   *
   */
  private static GrammaticalRelation getCaseMarkedRelation(GrammaticalRelation reln, String relationName) {
    GrammaticalRelation newReln = reln;

    if (reln.getSpecific() != null) {
      reln = reln.getParent();
    }
    if (reln == NOMINAL_MODIFIER) {
      newReln = UniversalEnglishGrammaticalRelations.getNmod(relationName);
    } else if (reln == ADV_CLAUSE_MODIFIER) {
      newReln = UniversalEnglishGrammaticalRelations.getAdvcl(relationName);
    } else if (reln == CLAUSAL_MODIFIER) {
      newReln = UniversalEnglishGrammaticalRelations.getAcl(relationName);
    }
    return newReln;
  }


  private static final SemgrexPattern CONJUNCTION_PATTERN = SemgrexPattern.compile("{}=gov >cc {}=cc >conj {}=conj");


  /**
   * Adds the type of conjunction to all conjunct relations.
   * 

   * cc(Marie, and), conj(Marie, Chris) and conj(Marie, John)
   * become cc(Marie, and), conj:and(Marie, Chris) and conj:and(Marie, John).
   * 

   * In case multiple coordination marker depend on the same governor
   * the one that precedes the conjunct is appended to the conjunction relation or the
   * first one if no preceding marker exists.
   * 

   * Some multi-word coordination markers are collapsed to conj:and or conj:negcc.
   * See {@link #conjValue(IndexedWord, SemanticGraph)}.
   *
   * @param sg A SemanticGraph from a sentence
   */
  private static void addConjInformation(SemanticGraph sg) {

    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
      return;

    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = CONJUNCTION_PATTERN.matcher(sgCopy);

    IndexedWord oldGov = null;
    IndexedWord oldCcDep = null;
    List conjDeps = Generics.newLinkedList();

    while (matcher.find()) {
      IndexedWord conjDep = matcher.getNode("conj");
      IndexedWord gov = matcher.getNode("gov");
      IndexedWord ccDep = matcher.getNode("cc");
      if (oldGov != null &&  (! gov.equals(oldGov) || ! ccDep.equals(oldCcDep))) {
        addConjToReln(sg, oldGov, conjDeps, oldCcDep);
        conjDeps = Generics.newLinkedList();
      }
      oldCcDep = ccDep;
      conjDeps.add(conjDep);
      oldGov = gov;
    }

    if (oldGov != null) {
      addConjToReln(sg, oldGov, conjDeps, oldCcDep);
    }

  }

  /*
   * Used by addConjInformation.
   */
  private static void addConjToReln(SemanticGraph sg,
      IndexedWord gov, List conjDeps, IndexedWord ccDep) {

    for (IndexedWord conjDep : conjDeps) {
      SemanticGraphEdge edge = sg.getEdge(gov, conjDep);
      if (edge.getRelation() == CONJUNCT || conjDep.index() > ccDep.index()) {
        edge.setRelation(conjValue(ccDep, sg));
      }
    }
  }

  /* Used by correctWHAttachment */
  private static final SemgrexPattern XCOMP_PATTERN = SemgrexPattern.compile("{}=root >xcomp {}=embedded >/^(dep|dobj)$/ {}=wh ?>/([di]obj)/ {}=obj");

  private static final Morphology morphology = new Morphology();

  /**
   * Tries to correct complicated cases of WH-movement in
   * sentences such as "What does Mary seem to have?" in
   * which "What" should attach to "have" instead of the
   * control verb.
   *
   * @param sg The Semantic graph to operate on.
   */
  private static void correctWHAttachment(SemanticGraph sg) {

    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
      return;

    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = XCOMP_PATTERN.matcher(sgCopy);
    while (matcher.findNextMatchingNode()) {
      IndexedWord root = matcher.getNode("root");
      IndexedWord embeddedVerb = matcher.getNode("embedded");
      IndexedWord wh = matcher.getNode("wh");
      IndexedWord dobj = matcher.getNode("obj");

      /* Check if the object is a WH-word. */
      if (wh.tag().startsWith("W")) {
        boolean reattach = false;
        /* If the control verb already has an object, then
           we have to reattach the WH-word to the verb in the embedded clause. */
        if (dobj != null) {
          reattach = true;
        } else {
          /* If the control verb can't have an object, we also have to reattach. */
          String lemma = morphology.lemma(root.value(), root.tag());
          if (lemma.matches(EnglishPatterns.NP_V_S_INF_VERBS_REGEX)) {
            reattach = true;
          }
        }

        if (reattach) {
          SemanticGraphEdge edge = sg.getEdge(root, wh);
          if (edge != null) {
            sg.removeEdge(edge);
            sg.addEdge(embeddedVerb, wh, DIRECT_OBJECT, Double.NEGATIVE_INFINITY, false);
          }
        }
      }
    }
  }


  /**
   * What we do in this method is look for temporary dependencies of
   * the type "rel" and "prep".  These occur in sentences such as "I saw the man
   * who you love".  In that case, we should produce dobj(love, who).
   * On the other hand, in the sentence "... which Mr. Bush was
   * fighting for", we should have case(which, for).
   */
   private static void convertRel(SemanticGraph sg) {

    for (SemanticGraphEdge prep : sg.findAllRelns(PREPOSITION)) {

      boolean changedPrep = false;

      for (SemanticGraphEdge nmod : sg.outgoingEdgeIterable(prep.getGovernor())) {

        // todo: It would also be good to add a rule here to prefer ccomp nsubj over dobj if there is a ccomp with no subj
        // then we could get right: Which eco-friendly options do you think there will be on the new Lexus?
        if (nmod.getRelation() != NOMINAL_MODIFIER && nmod.getRelation() != RELATIVE) {
          continue;
        }

        if (prep.getDependent().index() < nmod.getDependent().index()) {
          continue;
        }

        sg.removeEdge(prep);
        sg.addEdge(nmod.getDependent(), prep.getDependent(), CASE_MARKER, Double.NEGATIVE_INFINITY, false);

        changedPrep = true;

        if (nmod.getRelation() == RELATIVE) {
          nmod.setRelation(NOMINAL_MODIFIER);
        }

        break;
      }

      if ( ! changedPrep) {
        prep.setRelation(NOMINAL_MODIFIER);
      }
    }

    /* Rename remaining "rel" relations. */
    for (SemanticGraphEdge edge : sg.findAllRelns(RELATIVE)) {
      edge.setRelation(DIRECT_OBJECT);
    }
  }

  /**
   * Destructively modifies this {@code Collection}
   * by collapsing several types of transitive pairs of dependencies or
   * by adding additional information from the dependents to the relation
   * of the governor.
   * If called with a tree of dependencies and both CCprocess and
   * includeExtras set to false, then the tree structure is preserved.
   * 

   *
   * 

   * nominal modifier dependencies: nmod
   * 
   * If there exist the relations case(hat, in) and nmod(in, hat) then
   * the nmod relation is enhanced to nmod:in(cat, hat).
   * The case(hat, in) relation is preserved.
   * clausal modifier of noun/adverbial clause modifier with case markers: acs/advcl
   * 
   * If there exist the relations case(attacking, of) and advcl(heard, attacking) then
   * the nmod relation is enhanced to nmod:of(heard, attacking).
   * The case(attacking, of) relation is preserved.
   * conjunct dependencies
   * 
   * If there exist the relations
   * cc(investors, and) and
   * conj(investors, regulators), then the conj relation is
   * enhanced to
   * conj:and(investors, regulators)
   * For relative clauses, it will collapse referent
   * 
   * ref(man, that) and dobj(love, that) are collapsed
   * to dobj(love, man)
   * 

   */
  @Override
  protected void collapseDependencies(List list, boolean CCprocess, Extras includeExtras) {
    SemanticGraph sg = new SemanticGraph(list);

    if (DEBUG) {
      printListSorted("collapseDependencies: CCproc: " + CCprocess + " includeExtras: " + includeExtras, sg.typedDependencies());
    }


    correctDependencies(sg);
    if (DEBUG) {
      printListSorted("After correctDependencies:", sg.typedDependencies());
    }

    processMultiwordPreps(sg);
    if (DEBUG) {
      printListSorted("After processMultiwordPreps:", sg.typedDependencies());
    }


    expandPPConjunctions(sg);
    if (DEBUG) {
      printListSorted("After expandPPConjunctions:", sg.typedDependencies());
    }

    expandPrepConjunctions(sg);
    if (DEBUG) {
      printListSorted("After expandPrepConjunctions:", sg.typedDependencies());
    }

    addCaseMarkerInformation(sg);
    if (DEBUG) {
      printListSorted("After addCaseMarkerInformation:", sg.typedDependencies());
    }

    addConjInformation(sg);
    if (DEBUG) {
      printListSorted("After addConjInformation:", sg.typedDependencies());
    }

    if (includeExtras.doRef) {
      addRef(sg);
      if (DEBUG) {
        printListSorted("After adding ref:", sg.typedDependencies());
      }

      if (includeExtras.collapseRef) {
        collapseReferent(sg);
        if (DEBUG) {
          printListSorted("After collapse referent:",  sg.typedDependencies());
        }
      }
    }

    if (CCprocess) {
      treatCC(sg);
      if (DEBUG) {
        printListSorted("After treatCC:", sg.typedDependencies());
      }
    }

    if (includeExtras.doSubj) {
      addExtraNSubj(sg);

      if (DEBUG) {
        printListSorted("After adding extra nsubj:", sg.typedDependencies());
      }
      correctSubjPass(sg);

      if (DEBUG) {
        printListSorted("After correctSubjPass:", sg.typedDependencies());
      }
    }

    list.clear();
    list.addAll(sg.typedDependencies());

    Collections.sort(list);
    if (DEBUG) {
      printListSorted("After all collapse:", list);
    }
  }

  @Override
  protected void collapseDependenciesTree(List list) {
    collapseDependencies(list, false, Extras.NONE);
  }

  /**
   * Does some hard coding to deal with relation in CONJP. For now we deal with:
   * but not, if not, instead of, rather than, but rather GO TO negcc 

   * as well as, not to mention, but also, & GO TO and.
   *
   * @param cc The head dependency of the conjunction marker
   * @param sg The complete current semantic graph
   * @return A GrammaticalRelation made from a normalized form of that
   *         conjunction.
   */
  private static GrammaticalRelation conjValue(IndexedWord cc, SemanticGraph sg) {

    int pos = cc.index();
    String newConj = cc.value().toLowerCase();

    if (newConj.equals("not")) {
      IndexedWord prevWord = sg.getNodeByIndexSafe(pos - 1);
      if (prevWord != null && prevWord.value().toLowerCase().equals("but")) {
        return UniversalEnglishGrammaticalRelations.getConj("negcc");
      }
    }

    IndexedWord secondIWord = sg.getNodeByIndexSafe(pos + 1);

    if (secondIWord == null) {
      return UniversalEnglishGrammaticalRelations.getConj(cc.value());
    }
    String secondWord = secondIWord.value().toLowerCase();
    if (newConj.equals("but")) {
      if (secondWord.equals("rather")) {
        newConj = "negcc";
      } else if (secondWord.equals("also")) {
        newConj = "and";
      }
    } else if (newConj.equals("if") && secondWord.equals("not")) {
      newConj = "negcc";
    } else if (newConj.equals("instead") && secondWord.equals("of")) {
      newConj = "negcc";
    } else if (newConj.equals("rather") && secondWord.equals("than")) {
      newConj = "negcc";
    } else if (newConj.equals("as") && secondWord.equals("well")) {
      newConj = "and";
    } else if (newConj.equals("not") && secondWord.equals("to")) {
      IndexedWord thirdIWord = sg.getNodeByIndexSafe(pos + 2);
      String thirdWord = thirdIWord != null ? thirdIWord.value().toLowerCase() : null;
      if (thirdWord != null && thirdWord.equals("mention")) {
        newConj = "and";
      }
    }
    return UniversalEnglishGrammaticalRelations.getConj(newConj);
  }


  private static void treatCC(SemanticGraph sg) {

    // Construct a map from tree nodes to the set of typed
    // dependencies in which the node appears as dependent.
    Map> map = Generics.newHashMap();
    // Construct a map of tree nodes being governor of a subject grammatical
    // relation to that relation
    Map subjectMap = Generics.newHashMap();
    // Construct a set of TreeGraphNodes with a passive auxiliary on them
    Set withPassiveAuxiliary = Generics.newHashSet();
    // Construct a map of tree nodes being governor of an object grammatical
    // relation to that relation
    // Map objectMap = new
    // HashMap();

    List rcmodHeads = Generics.newArrayList();
    List prepcDep = Generics.newArrayList();


    for (SemanticGraphEdge edge : sg.edgeIterable()) {
      if (!map.containsKey(edge.getDependent())) {
        // NB: Here and in other places below, we use a TreeSet (which extends
        // SortedSet) to guarantee that results are deterministic)
        map.put(edge.getDependent(), new TreeSet<>());
      }
      map.get(edge.getDependent()).add(edge);

      if (edge.getRelation().equals(AUX_PASSIVE_MODIFIER)) {
        withPassiveAuxiliary.add(edge.getGovernor());
      }

      // look for subjects
      if (edge.getRelation().getParent() == NOMINAL_SUBJECT
          || edge.getRelation().getParent() == SUBJECT
          || edge.getRelation().getParent() == CLAUSAL_SUBJECT) {
        if (!subjectMap.containsKey(edge.getGovernor())) {
          subjectMap.put(edge.getGovernor(), edge);
        }
      }

      // look for objects
      // this map was only required by the code commented out below, so comment
      // it out too
      // if (typedDep.reln() == DIRECT_OBJECT) {
      // if (!objectMap.containsKey(typedDep.gov())) {
      // objectMap.put(typedDep.gov(), typedDep);
      // }
      // }

      // look for rcmod relations
      if (edge.getRelation() == RELATIVE_CLAUSE_MODIFIER) {
        rcmodHeads.add(edge.getGovernor());
      }
      // look for prepc relations: put the dependent of such a relation in the
      // list
      // to avoid wrong propagation of dobj
      if (edge.getRelation().toString().startsWith("acl:") || edge.getRelation().toString().startsWith("advcl:")) {
        prepcDep.add(edge.getDependent());
      }
    }

    // System.err.println(map);
    // if (DEBUG) System.err.println("Subject map: " + subjectMap);
    // if (DEBUG) System.err.println("Object map: " + objectMap);
    // System.err.println(rcmodHeads);

    // create a new list of typed dependencies
    //Collection newTypedDeps = new ArrayList(list);

    SemanticGraph sgCopy = sg.makeSoftCopy();

    // find typed deps of form conj(gov,dep)
    for (SemanticGraphEdge edge: sgCopy.edgeIterable()) {
      if (UniversalEnglishGrammaticalRelations.getConjs().contains(edge.getRelation())) {
        IndexedWord gov = edge.getGovernor();
        IndexedWord dep = edge.getDependent();

        // look at the dep in the conjunct
        Set gov_relations = map.get(gov);
        // System.err.println("gov " + gov);
        if (gov_relations != null) {
          for (SemanticGraphEdge edge1 : gov_relations) {
            // System.err.println("gov rel " + td1);
            IndexedWord newGov = edge1.getGovernor();
            // in the case of errors in the basic dependencies, it
            // is possible to have overlapping newGov & dep
            if (newGov.equals(dep)) {
              continue;
            }

            GrammaticalRelation newRel = edge1.getRelation();
            //TODO: Do we want to copy case markers here?
            if (newRel != ROOT && newRel != CASE_MARKER) {
              if (rcmodHeads.contains(gov) && rcmodHeads.contains(dep)) {
                // to prevent wrong propagation in the case of long dependencies in relative clauses
                if (newRel != DIRECT_OBJECT && newRel != NOMINAL_SUBJECT) {
                  if (DEBUG) {
                    System.err.println("Adding new " + newRel + " dependency from " + newGov + " to " + dep + " (subj/obj case)");
                  }
                  sg.addEdge(newGov, dep, newRel, Double.NEGATIVE_INFINITY, true);
                }
              } else {
                if (DEBUG) {
                  System.err.println("Adding new " + newRel + " dependency from " + newGov + " to " + dep);
                }
                sg.addEdge(newGov, dep, newRel, Double.NEGATIVE_INFINITY, true);
              }
            }
          }
        }

        // propagate subjects
        // look at the gov in the conjunct: if it is has a subject relation,
        // the dep is a verb and the dep doesn't have a subject relation
        // then we want to add a subject relation for the dep.
        // (By testing for the dep to be a verb, we are going to miss subject of
        // copular verbs! but
        // is it safe to relax this assumption?? i.e., just test for the subject
        // part)
        // CDM 2008: I also added in JJ, since participial verbs are often
        // tagged JJ
        String tag = dep.tag();
        if (subjectMap.containsKey(gov) && (tag.startsWith("VB") || tag.startsWith("JJ")) && ! subjectMap.containsKey(dep)) {
          SemanticGraphEdge tdsubj = subjectMap.get(gov);
          // check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then
          // add nsubj (if it is tagged correctly, should do this for VBD too, but we don't)
          GrammaticalRelation relation = tdsubj.getRelation();
          if (relation == NOMINAL_PASSIVE_SUBJECT) {
            if (isDefinitelyActive(tag)) {
              relation = NOMINAL_SUBJECT;
            }
          } else if (relation == CLAUSAL_PASSIVE_SUBJECT) {
            if (isDefinitelyActive(tag)) {
              relation = CLAUSAL_SUBJECT;
            }
          } else if (relation == NOMINAL_SUBJECT) {
            if (withPassiveAuxiliary.contains(dep)) {
              relation = NOMINAL_PASSIVE_SUBJECT;
            }
          } else if (relation == CLAUSAL_SUBJECT) {
            if (withPassiveAuxiliary.contains(dep)) {
              relation = CLAUSAL_PASSIVE_SUBJECT;
            }
          }
          if (DEBUG) {
            System.err.println("Adding new " + relation + " dependency from " + dep + " to " + tdsubj.getDependent() + " (subj propagation case)");
          }
          sg.addEdge(dep, tdsubj.getDependent(), relation, Double.NEGATIVE_INFINITY, true);
        }

        // propagate objects
        // cdm july 2010: This bit of code would copy a dobj from the first
        // clause to a later conjoined clause if it didn't
        // contain its own dobj or prepc. But this is too aggressive and wrong
        // if the later clause is intransitive
        // (including passivized cases) and so I think we have to not have this
        // done always, and see no good "sometimes" heuristic.
        // IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp
        // (SBAR).
        // if (objectMap.containsKey(gov) &&
        // dep.tag().startsWith("VB") && ! objectMap.containsKey(dep)
        // && ! prepcDep.contains(gov)) {
        // TypedDependency tdobj = objectMap.get(gov);
        // if (DEBUG) {
        // System.err.println("Adding new " + tdobj.reln() + " dependency from "
        // + dep + " to " + tdobj.dep() + " (obj propagation case)");
        // }
        // newTypedDeps.add(new TypedDependency(tdobj.reln(), dep,
        // tdobj.dep()));
        // }
      }
    }
  }

  private static boolean isDefinitelyActive(String tag) {
    // we should include VBD, but don't as it is often a tagging mistake.
    return tag.equals("VB") || tag.equals("VBZ") || tag.equals("VBP") || tag.startsWith("JJ");
  }


  /**
   * This method will collapse a referent relation such as follows. e.g.:
   * "The man that I love ... " ref(man, that) dobj(love, that) -> ref(man, that) dobj(love,
   * man)
   */
  private static void collapseReferent(SemanticGraph sg) {
    // find typed deps of form ref(gov, dep)
    // put them in a List for processing
    List refs = new ArrayList<>(sg.findAllRelns(REFERENT));

    SemanticGraph sgCopy = sg.makeSoftCopy();

    // now substitute target of referent where possible
    for (SemanticGraphEdge ref : refs) {
      IndexedWord dep = ref.getDependent();// take the relative word
      IndexedWord ant = ref.getGovernor();// take the antecedent

      for (Iterator iter = sgCopy.incomingEdgeIterator(dep); iter.hasNext(); ) {
        SemanticGraphEdge edge = iter.next();

        // the last condition below maybe shouldn't be necessary, but it has
        // helped stop things going haywire a couple of times (it stops the
        // creation of a unit cycle that probably leaves something else
        // disconnected) [cdm Jan 2010]
        if (edge.getRelation() != REFERENT && ! edge.getGovernor().equals(ant)) {
          sg.removeEdge(edge);
          sg.addEdge(edge.getGovernor(), ant, edge.getRelation(), Double.NEGATIVE_INFINITY, true);
        }
      }
    }
  }

  /**
   * Look for ref rules for a given word.  We look through the
   * children and grandchildren of the acl:relcl dependency, and if any
   * children or grandchildren depend on a that/what/which/etc word,
   * we take the leftmost that/what/which/etc word as the dependent
   * for the ref TypedDependency.
   */
  private static void addRef(SemanticGraph sg) {
    for (SemanticGraphEdge edge : sg.findAllRelns(RELATIVE_CLAUSE_MODIFIER)) {
      IndexedWord head = edge.getGovernor();
      IndexedWord modifier = edge.getDependent();

      SemanticGraphEdge leftChildEdge = null;
      for (SemanticGraphEdge childEdge : sg.outgoingEdgeIterable(modifier)) {
        if (EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(childEdge.getDependent().value()).matches() &&
            (leftChildEdge == null || childEdge.getDependent().index() < leftChildEdge.getDependent().index())) {
          leftChildEdge = childEdge;
        }
      }

      SemanticGraphEdge leftGrandchildEdge = null;
      for (SemanticGraphEdge childEdge : sg.outgoingEdgeIterable(modifier)) {
        for (SemanticGraphEdge grandchildEdge : sg.outgoingEdgeIterable(childEdge.getDependent())) {
          if (EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(grandchildEdge.getDependent().value()).matches() &&
              (leftGrandchildEdge == null || grandchildEdge.getDependent().index() < leftGrandchildEdge.getDependent().index())) {
            leftGrandchildEdge = grandchildEdge;
          }
        }
      }

      IndexedWord newDep = null;
      if (leftGrandchildEdge != null
          && (leftChildEdge == null || leftGrandchildEdge.getDependent().index() < leftChildEdge.getDependent().index())) {
        newDep = leftGrandchildEdge.getDependent();
      } else if (leftChildEdge != null) {
        newDep = leftChildEdge.getDependent();
      }
      if (newDep != null && ! sg.containsEdge(head, newDep)) {
        sg.addEdge(head, newDep, REFERENT, Double.NEGATIVE_INFINITY, true);
      }
    }
  }

  /**
   * Add extra nsubj dependencies when collapsing basic dependencies.
   * 

   * In the general case, we look for an aux modifier under an xcomp
   * modifier, and assuming there aren't already associated nsubj
   * dependencies as daughters of the original xcomp dependency, we
   * add nsubj dependencies for each nsubj daughter of the aux.
   * 

   * There is also a special case for "to" words, in which case we add
   * a dependency if and only if there is no nsubj associated with the
   * xcomp and there is no other aux dependency.  This accounts for
   * sentences such as "he decided not to" with no following verb.
   */
  private static void addExtraNSubj(SemanticGraph sg) {

    for (SemanticGraphEdge xcomp : sg.findAllRelns(XCLAUSAL_COMPLEMENT)) {
      IndexedWord modifier = xcomp.getDependent();
      IndexedWord head = xcomp.getGovernor();

      boolean hasSubjectDaughter = false;
      boolean hasAux = false;
      List subjects = Generics.newArrayList();
      List objects = Generics.newArrayList();
      for (SemanticGraphEdge dep : sg.edgeIterable()) {
        // already have a subject dependency
        if ((dep.getRelation() == NOMINAL_SUBJECT || dep.getRelation() == NOMINAL_PASSIVE_SUBJECT) && dep.getGovernor().equals(modifier)) {
          hasSubjectDaughter = true;
          break;
        }

        if ((dep.getRelation() == AUX_MODIFIER || dep.getRelation() == MARKER) && dep.getGovernor().equals(modifier)) {
          hasAux = true;
        }

        if ((dep.getRelation() == NOMINAL_SUBJECT || dep.getRelation() == NOMINAL_PASSIVE_SUBJECT) && dep.getGovernor().equals(head)) {
          subjects.add(dep.getDependent());
        }

        if (dep.getRelation() == DIRECT_OBJECT && dep.getGovernor().equals(head)) {
          objects.add(dep.getDependent());
        }
      }

      // if we already have an nsubj dependency, no need to add an extra nsubj
      if (hasSubjectDaughter) {
        continue;
      }

      if ((modifier.value().equalsIgnoreCase("to") && hasAux) ||
          (!modifier.value().equalsIgnoreCase("to") && !hasAux)) {
        continue;
      }

      // In general, we find that the objects of the verb are better
      // for extra nsubj than the original nsubj of the verb.  For example,
      // "Many investors wrote asking the SEC to require ..."
      // There is no nsubj of asking, but the dobj, SEC, is the extra nsubj of require.
      // Similarly, "The law tells them when to do so"
      // Instead of nsubj(do, law) we want nsubj(do, them)
      if (objects.size() > 0) {
        for (IndexedWord object : objects) {
          if ( ! sg.containsEdge(modifier, object))
            sg.addEdge(modifier, object, NOMINAL_SUBJECT, Double.NEGATIVE_INFINITY, true);
        }
      } else {
        for (IndexedWord subject : subjects) {
          if ( ! sg.containsEdge(modifier, subject))
            sg.addEdge(modifier, subject, NOMINAL_SUBJECT, Double.NEGATIVE_INFINITY, true);
        }
      }
    }
  }

  private static SemgrexPattern CORRECT_SUBJPASS_PATTERN = SemgrexPattern.compile("{}=gov >auxpass {} >/^(nsubj|csubj)$/ {}=subj");

  /**
   * This method corrects subjects of verbs for which we identified an auxpass,
   * but didn't identify the subject as passive.
   *
   * @param sg SemanticGraph to work on
   */
  private static void correctSubjPass(SemanticGraph sg) {

    /* If the graph doesn't have a root (most likely because
     * a parsing error, we can't match Semgrexes, so do
     * nothing. */
    if (sg.getRoots().isEmpty())
      return;

    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = CORRECT_SUBJPASS_PATTERN.matcher(sgCopy);

    while (matcher.find()) {
      IndexedWord gov = matcher.getNode("gov");
      IndexedWord subj = matcher.getNode("subj");
      SemanticGraphEdge edge = sg.getEdge(gov, subj);

      GrammaticalRelation reln = null;
      if (edge.getRelation() == NOMINAL_SUBJECT) {
        reln = NOMINAL_PASSIVE_SUBJECT;
      } else if (edge.getRelation() == CLAUSAL_SUBJECT) {
        reln = CLAUSAL_PASSIVE_SUBJECT;
      }

      if (reln != null) {
        sg.removeEdge(edge);
        sg.addEdge(gov, subj, reln, Double.NEGATIVE_INFINITY, false);
      }
    }
  }

  /* These multi-word prepositions typically have a
   *   case/advmod(gov, w1)
   *   case(gov, w2)
   * structure in the basic represenation.
   *
   * Kept in alphabetical order.
   */
  private static final String[] TWO_WORD_PREPS_REGULAR = {"across_from", "along_with", "alongside_of", "apart_from", "as_for", "as_from", "as_of", "as_per", "as_to", "aside_from", "based_on", "close_by", "close_to", "contrary_to", "compared_to", "compared_with", " depending_on", "except_for", "exclusive_of", "far_from", "followed_by", "inside_of", "irrespective_of", "next_to", "near_to", "off_of", "out_of", "outside_of", "owing_to", "preliminary_to", "preparatory_to", "previous_to", " prior_to", "pursuant_to", "regardless_of", "subsequent_to", "thanks_to", "together_with"};

  /* These multi-word prepositions can have a
   *   advmod(gov1, w1)
   *   nmod(w1, gov2)
   *   case(gov2, w2)
   * structure in the basic represenation.
   *
   * Kept in alphabetical order.
   */
  private static final String[] TWO_WORD_PREPS_COMPLEX = {"apart_from", "as_from", "aside_from", "away_from", "close_by", "close_to", "contrary_to", "far_from", "next_to", "near_to", "out_of", "outside_of", "pursuant_to", "regardless_of", "together_with"};

  /*
   * Multi-word prepositions with the structure
   *   case(w2, w1)
   *   nmod(gov, w2)
   *   case(gov2, w3)
   *   nmod(w2, gov2)
   * in the basic representations.
   */
  private static final String[] THREE_WORD_PREPS = { "by_means_of", "in_accordance_with", "in_addition_to", "in_case_of", "in_front_of", "in_lieu_of", "in_place_of", "in_spite_of", "on_account_of", "on_behalf_of", "on_top_of", "with_regard_to", "with_respect_to" };


  private static final SemgrexPattern TWO_WORD_PREPS_REGULAR_PATTERN = SemgrexPattern.compile("{}=gov >/(case|advmod)/ ({}=w1 !> {}) >case ({}=w2 !== {}=w1 !> {})");
  private static final SemgrexPattern TWO_WORD_PREPS_COMPLEX_PATTERN = SemgrexPattern.compile("({}=w1 >nmod ({}=gov2 >case ({}=w2 !> {}))) [ == {$} | < {}=gov ]");
  private static final SemgrexPattern THREE_WORD_PREPS_PATTERN = SemgrexPattern.compile("({}=w2 >/(nmod|acl|advcl)/ ({}=gov2 >/(case|mark)/ ({}=w3 !> {}))) >case ({}=w1 !> {}) [ < {}=gov | == {$} ]");


  /**
   * Process multi-word prepositions.
   */
  private static void processMultiwordPreps(SemanticGraph sg) {

    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
      return;

    HashMap> bigrams = new HashMap<>();
    HashMap> trigrams = new HashMap<>();


    List vertexList = sg.vertexListSorted();
    int numWords = vertexList.size();

    for (int i = 1; i < numWords; i++) {
      String bigram = vertexList.get(i-1).value().toLowerCase() + "_" + vertexList.get(i).value().toLowerCase();

      if (bigrams.get(bigram) == null) {
        bigrams.put(bigram, new HashSet<>());
      }

      bigrams.get(bigram).add(vertexList.get(i-1).index());

      if (i > 1) {
        String trigram = vertexList.get(i-2).value().toLowerCase() + "_" + bigram;

        if (trigrams.get(trigram) == null) {
          trigrams.put(trigram, new HashSet<>());
        }

        trigrams.get(trigram).add(vertexList.get(i-2).index());
      }
    }

    /* Simple two-word prepositions. */
    processSimple2WP(sg, bigrams);

    /* More complex two-word prepositions in which the first
     * preposition is the head of the prepositional phrase. */
    processComplex2WP(sg, bigrams);

    /* Process three-word prepositions. */
    process3WP(sg, trigrams);

  }


  /**
   * Processes all the two-word prepositions in TWO_WORD_PREPS_REGULAR.
   */
  private static void processSimple2WP(SemanticGraph sg, HashMap> bigrams) {
    for (String bigram : TWO_WORD_PREPS_REGULAR) {
      if (bigrams.get(bigram) == null) {
        continue;
      }

      for (Integer i : bigrams.get(bigram)) {
        IndexedWord w1 = sg.getNodeByIndexSafe(i);
        IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);

        if (w1 == null || w2 == null) {
          continue;
        }

        SemgrexMatcher matcher = TWO_WORD_PREPS_REGULAR_PATTERN.matcher(sg);
        IndexedWord gov = null;
        while (matcher.find()) {
          if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2"))) {
            gov = matcher.getNode("gov");
            break;
          }
        }

        if (gov == null) {
          continue;
        }

        createMultiWordExpression(sg, gov, CASE_MARKER, w1, w2);
      }
    }
  }


  /**
   * Processes all the two-word prepositions in TWO_WORD_PREPS_COMPLEX.
   */
  private static void processComplex2WP(SemanticGraph sg, HashMap> bigrams) {
    for (String bigram : TWO_WORD_PREPS_COMPLEX) {
      if (bigrams.get(bigram) == null) {
        continue;
      }

      for (Integer i : bigrams.get(bigram)) {
        IndexedWord w1 = sg.getNodeByIndexSafe(i);
        IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);

        if (w1 == null || w2 == null) {
          continue;
        }

        SemgrexMatcher matcher = TWO_WORD_PREPS_COMPLEX_PATTERN.matcher(sg);
        IndexedWord gov = null;
        IndexedWord gov2 = null;
        while (matcher.find()) {
          if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2"))) {
            gov = matcher.getNode("gov");
            gov2 = matcher.getNode("gov2");
            break;
          }
        }

        if (gov2 == null) {
          continue;
        }

        /* Attach the head of the prepositional phrase to
         * the head of w1. */
        if (sg.getRoots().contains(w1)) {
          SemanticGraphEdge edge = sg.getEdge(w1, gov2);
          if (edge == null) {
            continue;
          }

          sg.removeEdge(edge);
          sg.getRoots().remove(w1);
          sg.addRoot(gov2);
        } else {
          SemanticGraphEdge edge = sg.getEdge(w1, gov2);
          if (edge == null) {
            continue;
          }
          sg.removeEdge(edge);

          gov = gov == null ? sg.getParent(w1) : gov;
          if (gov == null) {
            continue;
          }

          /* Determine the relation to use. If it is a relation that can
           * join two clauses and w1 is the head of a copular construction, then
           * use the relation of w1 and its parent. Otherwise use the relation of edge. */
          GrammaticalRelation reln = edge.getRelation();
          if (sg.hasChildWithReln(w1, COPULA)) {
            GrammaticalRelation reln2 = sg.getEdge(gov, w1).getRelation();
            if (clauseRelations.contains(reln2)) {
              reln = reln2;
            }
          }
         sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
        }

        /* Make children of w1 dependents of gov2. */
        for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w1)) {
          sg.removeEdge(edge2);
          sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
        }

        createMultiWordExpression(sg, gov2, CASE_MARKER, w1, w2);
      }
    }
  }


  /**
   * Processes all the three-word prepositions in THREE_WORD_PREPS.
   */
  private static void process3WP(SemanticGraph sg, HashMap> trigrams) {

    for (String trigram : THREE_WORD_PREPS) {
      if (trigrams.get(trigram) == null) {
        continue;
      }

      for (Integer i : trigrams.get(trigram)) {
        IndexedWord w1 = sg.getNodeByIndexSafe(i);
        IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
        IndexedWord w3 = sg.getNodeByIndexSafe(i + 2);

        if (w1 == null || w2 == null || w3 == null) {
          continue;
        }

        SemgrexMatcher matcher = THREE_WORD_PREPS_PATTERN.matcher(sg);
        IndexedWord gov = null;
        IndexedWord gov2 = null;
        while (matcher.find()) {
          if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2")) && w3.equals(matcher.getNode("w3"))) {
            gov = matcher.getNode("gov");
            gov2 = matcher.getNode("gov2");
            break;
          }
        }

        if (gov2 == null) {
          continue;
        }

        GrammaticalRelation markerReln = CASE_MARKER;

        if (sg.getRoots().contains(w2)) {
          SemanticGraphEdge edge = sg.getEdge(w2, gov2);
          if (edge == null) {
            continue;
          }

          sg.removeEdge(edge);
          sg.getRoots().remove(w2);
          sg.addRoot(gov2);
        } else {
          SemanticGraphEdge edge = sg.getEdge(w2, gov2);
          if (edge == null) {
            continue;
          }
          sg.removeEdge(edge);

          gov = gov == null ? sg.getParent(w2) : gov;
          if (gov == null) {
            continue;
          }

          GrammaticalRelation reln = sg.getEdge(gov, w2).getRelation();
          if (reln == NOMINAL_MODIFIER
              && (edge.getRelation() == CLAUSAL_MODIFIER ||
                  edge.getRelation() == ADV_CLAUSE_MODIFIER)) {
            reln = edge.getRelation();
            markerReln = MARKER;
          }
          sg.addEdge(gov, gov2, reln, Double.NEGATIVE_INFINITY, false);
        }

        /* Make children of w2 dependents of gov2. */
        for (SemanticGraphEdge edge2 : sg.getOutEdgesSorted(w2)) {
          sg.removeEdge(edge2);
          sg.addEdge(gov2, edge2.getDependent(), edge2.getRelation(), edge2.getWeight(), edge2.isExtra());
        }

        createMultiWordExpression(sg, gov2, markerReln, w1, w2, w3);
      }
    }
  }

  private static void createMultiWordExpression(SemanticGraph sg, IndexedWord gov, GrammaticalRelation reln, IndexedWord... words) {
    if (sg.getRoots().isEmpty() || gov == null || words.length < 1) {
      return;
    }

    boolean first = true;
    IndexedWord mweHead = null;
    for (IndexedWord word : words) {
      IndexedWord wordGov = sg.getParent(word);
      if (wordGov != null) {
        SemanticGraphEdge edge = sg.getEdge(wordGov, word);
        if (edge != null) {
          sg.removeEdge(edge);
        }
      }

      if (first) {
        sg.addEdge(gov, word, reln, Double.NEGATIVE_INFINITY, false);
        mweHead = word;
        first = false;
      } else {
        sg.addEdge(mweHead, word, MULTI_WORD_EXPRESSION, Double.NEGATIVE_INFINITY, false);
      }
    }
  }

  /**
   * Find and remove any exact duplicates from a dependency list.
   * For example, the method that "corrects" nsubj dependencies can
   * turn them into nsubjpass dependencies.  If there is some other
   * source of nsubjpass dependencies, there may now be multiple
   * copies of the nsubjpass dependency.  If the containing data type
   * is a List, they may both now be in the List.
   */
  private static void removeExactDuplicates(SemanticGraph sg) {
    sg.deleteDuplicateEdges();
  }


  public static List readCoNLLXGrammaticalStructureCollection(String fileName) throws IOException {
    return readCoNLLXGrammaticalStructureCollection(fileName, UniversalEnglishGrammaticalRelations.shortNameToGRel, new FromDependenciesFactory());
  }

  public static UniversalEnglishGrammaticalStructure buildCoNLLXGrammaticalStructure(List> tokenFields) {
    return (UniversalEnglishGrammaticalStructure) buildCoNLLXGrammaticalStructure(tokenFields, UniversalEnglishGrammaticalRelations.shortNameToGRel, new FromDependenciesFactory());
  }

  public static class FromDependenciesFactory
    implements GrammaticalStructureFromDependenciesFactory {
    @Override
    public UniversalEnglishGrammaticalStructure build(List tdeps, TreeGraphNode root) {
      return new UniversalEnglishGrammaticalStructure(tdeps, root);
    }
  }

} // end class UniversalEnglishGrammaticalStructure

    

    

    
            
    
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api