All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.EnglishGrammaticalStructure Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.trees;

import java.io.*;
import java.util.*;
import java.util.function.Predicate;

import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.process.Morphology;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.util.*;
import edu.stanford.nlp.util.logging.Redwood;

import static edu.stanford.nlp.trees.EnglishGrammaticalRelations.*;
import static edu.stanford.nlp.trees.GrammaticalRelation.*;

/**
 * A GrammaticalStructure for English. This is the class that produces Stanford Dependencies.
 *
 * For feeding Stanford parser trees into this class, the Stanford parser should be run with the
 * "-retainNPTmpSubcategories" option for best results!
 *
 * @author Bill MacCartney
 * @author Marie-Catherine de Marneffe
 * @author Christopher Manning
 * @author Daniel Cer (CoNLLX format and alternative user selected dependency
 *         printer/reader interface)
 * @author John Bauer
 */
public class EnglishGrammaticalStructure extends GrammaticalStructure  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(EnglishGrammaticalStructure.class);

  private static final long serialVersionUID = -1866362375001969402L;

  private static final boolean DEBUG = System.getProperty("EnglishGrammaticalStructure", null) != null;

  /**
   * Construct a new {@code EnglishGrammaticalStructure} from an existing parse
   * tree. The new {@code GrammaticalStructure} has the same tree structure
   * and label values as the given tree (but no shared storage). As part of
   * construction, the parse tree is analyzed using definitions from
   * {@link GrammaticalRelation {@code GrammaticalRelation}} to populate
   * the new {@code GrammaticalStructure} with as many labeled grammatical
   * relations as it can.
   *
   * @param t Parse tree to make grammatical structure from
   */
  public EnglishGrammaticalStructure(Tree t) {
    this(t, new PennTreebankLanguagePack().punctuationWordRejectFilter());
  }

  /**
   * This gets used by GrammaticalStructureFactory (by reflection). DON'T DELETE.
   *
   * @param t Parse tree to make grammatical structure from
   * @param puncFilter Filter to remove punctuation dependencies
   */
  public EnglishGrammaticalStructure(Tree t, Predicate puncFilter) {
    this(t, puncFilter, new SemanticHeadFinder(true));
  }

  /**
   * Construct a new {@code GrammaticalStructure} from an existing parse
   * tree. The new {@code GrammaticalStructure} has the same tree structure
   * and label values as the given tree (but no shared storage). As part of
   * construction, the parse tree is analyzed using definitions from
   * {@link GrammaticalRelation {@code GrammaticalRelation}} to populate
   * the new {@code GrammaticalStructure} with as many labeled grammatical
   * relations as it can.
   *
   * Once upon a time this method had an extra parameter as to whether to operate
   * in a threadsafe manner. We decided that that was a really bad idea, and this
   * method now always acts in a threadsafe manner.
   * This method gets used by GrammaticalStructureFactory (by reflection). DON'T DELETE.
   *
   * @param t Parse tree to make grammatical structure from
   * @param puncFilter Filter for punctuation words
   * @param hf HeadFinder to use when building it
   */
  public EnglishGrammaticalStructure(Tree t, Predicate puncFilter, HeadFinder hf) {
    // the tree is normalized (for index and functional tag stripping) inside CoordinationTransformer
    super(t, EnglishGrammaticalRelations.values(), EnglishGrammaticalRelations.valuesLock(), new CoordinationTransformer(hf), hf, puncFilter, Filters.acceptFilter());
  }

  /** Used for postprocessing CoNLL X dependencies */
  public EnglishGrammaticalStructure(List projectiveDependencies, TreeGraphNode root) {
    super(projectiveDependencies, root);
  }


  /**
   * Returns a Filter which checks dependencies for usefulness as
   * extra tree-based dependencies.  By default, everything is
   * accepted.  One example of how this can be useful is in the
   * English dependencies, where the REL dependency is used as an
   * intermediate and we do not want this to be added when we make a
   * second pass over the trees for missing dependencies.
   */
  @Override
  protected Predicate extraTreeDepFilter() {
    return extraTreeDepFilter;
  }

  private static class ExtraTreeDepFilter implements Predicate, Serializable {
    @Override
    public boolean test(TypedDependency d) {
      return d != null && d.reln() != RELATIVE;
    }

    private static final long serialVersionUID = 1L;
  }

  private static final Predicate extraTreeDepFilter = new ExtraTreeDepFilter();


  @Override
  protected void correctDependencies(List list) {
    if (DEBUG) {
      printListSorted("At correctDependencies:", list);
    }
    correctSubjPass(list);
    if (DEBUG) {
      printListSorted("After correctSubjPass:", list);
    }
    removeExactDuplicates(list);
    if (DEBUG) {
      printListSorted("After removeExactDuplicates:", list);
    }
  }

  private static void printListSorted(String title, Collection list) {
    List lis = new ArrayList<>(list);
    Collections.sort(lis);
    if (title != null) {
      log.info(title);
    }
    log.info(lis);
  }

  @Override
  protected void postProcessDependencies(List list) {
    if (DEBUG) {
      printListSorted("At postProcessDependencies:", list);
    }

    SemanticGraph sg = new SemanticGraph(list);
    correctWHAttachment(sg);
    list.clear();
    list.addAll(sg.typedDependencies());
    if (DEBUG) {
      printListSorted("After correcting WH movement", list);
    }

    convertRel(list);
    if (DEBUG) {
      printListSorted("After converting rel:", list);
    }
  }

  @Override
  protected void getExtras(List list) {
    addRef(list);
    if (DEBUG) {
      printListSorted("After adding ref:", list);
    }

    addExtraNSubj(list);
    if (DEBUG) {
      printListSorted("After adding extra nsubj:", list);
    }

    addStrandedPobj(list);
    if (DEBUG) {
      printListSorted("After adding stranded pobj:", list);
    }
  }

  // Using this makes addStrandedPobj a lot cleaner looking, but it
  // makes the converter roughly 2% slower.  Might not be worth it.
  // Similar changes could be made to many of the other complicated
  // collapsing methods.
  // static final SemgrexPattern strandedPobjSemgrex = SemgrexPattern.compile("{}=head >rcmod ({} [ == {}=prepgov | >xcomp {}=prepgov | >conj {}=prepgov ]) : {}=prepgov >prep ({}=prepdep !>pcomp {} !> pobj {})");
  // // Deal with preposition stranding in relative clauses.
  // // For example, "the only thing I'm rooting for"
  // // This method will add pobj(for, thing) by connecting using the rcmod and prep
  // private static void addStrandedPobj(List list) {
  //   SemanticGraph graph = new SemanticGraph(list);
  //   SemgrexMatcher matcher = strandedPobjSemgrex.matcher(graph);
  //   while (matcher.find()) {
  //     IndexedWord gov = matcher.getNode("prepdep");
  //     IndexedWord dep = matcher.getNode("head");

  //     TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, gov, dep);
  //     newDep.setExtra();
  //     list.add(newDep);
  //   }
  // }


  // Deal with preposition stranding in relative clauses.
  // For example, "the only thing I'm rooting for"
  // This method will add pobj(for, thing) by connecting using the rcmod and prep
  private static void addStrandedPobj(List list) {
    List depNodes = null;
    List newDeps = null;
    for (TypedDependency rcmod : list) {
      if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) {
        continue;
      }

      IndexedWord head = rcmod.gov();
      if (depNodes == null) {
        depNodes = Generics.newArrayList();
      } else {
        depNodes.clear();
      }
      depNodes.add(rcmod.dep());
      for (TypedDependency connected : list) {
        if (connected.gov().equals(rcmod.dep()) && (connected.reln() == XCLAUSAL_COMPLEMENT || connected.reln() == CONJUNCT)) {
          depNodes.add(connected.dep());
        }
      }

      for (IndexedWord dep : depNodes) {
        for (TypedDependency prep : list) {
          if (!prep.gov().equals(dep) || prep.reln() != PREPOSITIONAL_MODIFIER) {
            continue;
          }

          boolean found = false;
          for (TypedDependency other : list) {
            if (other.gov().equals(prep.dep()) && (other.reln() == PREPOSITIONAL_COMPLEMENT || other.reln() == PREPOSITIONAL_OBJECT)) {
              found = true;
              break;
            }
          }
          if (!found) {
            if (newDeps == null) {
              newDeps = Generics.newArrayList();
            }
            TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, prep.dep(), head);
            newDeps.add(newDep);
          }
        }
      }

    }
    if (newDeps != null) {
      list.addAll(newDeps);
    }
  }


  /* Used by correctWHAttachment */
  private static final SemgrexPattern XCOMP_PATTERN = SemgrexPattern.compile("{}=root >xcomp {}=embedded >/^(dep|dobj)$/ {}=wh ?>/([di]obj)/ {}=obj");

  /**
   * Tries to correct complicated cases of WH-movement in
   * sentences such as "What does Mary seem to have?" in
   * which "What" should attach to "have" instead of the
   * control verb.
   *
   * @param sg The Semantic graph to operate on.
   */
  private static void correctWHAttachment(SemanticGraph sg) {
    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
      return;

    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = XCOMP_PATTERN.matcher(sgCopy);
    while (matcher.findNextMatchingNode()) {
      IndexedWord root = matcher.getNode("root");
      IndexedWord embeddedVerb = matcher.getNode("embedded");
      IndexedWord wh = matcher.getNode("wh");
      IndexedWord dobj = matcher.getNode("obj");

      /* Check if the object is a WH-word. */
      if (wh.tag().startsWith("W")) {
        boolean reattach = false;
        /* If the control verb already has an object, then
           we have to reattach th WH-word to the verb in the embedded clause. */
        if (dobj != null) {
          reattach = true;
        } else {
          /* If the control verb can't have an object, we also have to reattach. */
          String lemma = Morphology.lemmaStatic(root.value(), root.tag());
          if (lemma.matches(EnglishPatterns.NP_V_S_INF_VERBS_REGEX)) {
            reattach = true;
          }
        }

        if (reattach) {
          SemanticGraphEdge edge = sg.getEdge(root, wh);
          if (edge != null) {
            sg.removeEdge(edge);
            sg.addEdge(embeddedVerb, wh, DIRECT_OBJECT, Double.NEGATIVE_INFINITY, false);
          }
        }
      }
    }
  }

  /**
   * What we do in this method is look for temporary dependencies of
   * the type "rel".  These occur in sentences such as "I saw the man
   * who you love".  In that case, we should produce dobj(love, who).
   * On the other hand, in the sentence "... which Mr. Bush was
   * fighting for", we should have pobj(for, which).
   */
  private static void convertRel(List list) {
    List newDeps = new ArrayList<>();
    for (TypedDependency rel : list) {
      if (rel.reln() != RELATIVE) {
        continue;
      }

      boolean foundPrep = false;
      for (TypedDependency prep : list) {

        // todo: It would also be good to add a rule here to prefer ccomp nsubj over dobj if there is a ccomp with no subj
        // then we could get right: Which eco-friendly options do you think there will be on the new Lexus?

        if (prep.reln() != PREPOSITIONAL_MODIFIER) {
          continue;
        }
        if (!prep.gov().equals(rel.gov())) {

          //Try to find a clausal complement with a preposition without an
          //object. For sentences such as "What am I good at?"
          boolean hasCompParent = false;
          for (TypedDependency prep2 : list) {
            if (prep2.reln() == XCLAUSAL_COMPLEMENT
                || prep2.reln() == ADJECTIVAL_COMPLEMENT
                || prep2.reln() == CLAUSAL_COMPLEMENT
                || prep2.reln() == ROOT) {
              if (prep.gov().equals(prep2.dep()) && prep2.gov().equals(rel.gov())) {
                hasCompParent = true;
                break;
              }
            }
          }

          if ( ! hasCompParent)
            continue;
        }

        // at this point, we have two dependencies as in the Mr. Bush
        // example.  it should be rel(fighting, which) and
        // prep(fighting, for).  We now look to see if there is a
        // corresponding pobj associated with the dependent of the
        // prep relation.  If not, we will connect the dep of the prep
        // relation and the head of the rel relation.  Otherwise, the
        // original rel relation will become a dobj.
        boolean foundPobj = false;
        for (TypedDependency pobj : list) {
          if (pobj.reln() != PREPOSITIONAL_OBJECT && pobj.reln() != PREPOSITIONAL_COMPLEMENT) {
            continue;
          }
          if (!pobj.gov().equals(prep.dep())) {
            continue;
          }
          // we did find a pobj/pcomp, so it is not necessary to
          // change this rel.
          foundPobj = true;
          break;
        }

        if (!foundPobj) {
          foundPrep = true;
          TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, prep.dep(), rel.dep());
          newDeps.add(newDep);
          rel.setReln(KILL);
          // break; // only put it in one place (or do we want to allow across-the-board effects?
        }
      }
      if (!foundPrep) {
        rel.setReln(DIRECT_OBJECT);
      }
    }

    filterKill(list);
    for (TypedDependency dep : newDeps) {
      if (!list.contains(dep)) {
        list.add(dep);
      }
    }
  }

  /**
   * Alters a list in place by removing all the KILL relations
   */
  private static void filterKill(Collection deps) {
    List filtered = Generics.newArrayList();
    for (TypedDependency dep : deps) {
      if (dep.reln() != KILL) {
        filtered.add(dep);
      }
    }
    deps.clear();
    deps.addAll(filtered);
  }


  /**
   * Destructively modifies this {@code Collection}
   * by collapsing several types of transitive pairs of dependencies.
   * If called with a tree of dependencies and both CCprocess and
   * includeExtras set to false, then the tree structure is preserved.
   * 
*
prepositional object dependencies: pobj
*
* {@code prep(cat, in)} and {@code pobj(in, hat)} are collapsed to * {@code prep_in(cat, hat)}
*
prepositional complement dependencies: pcomp
*
* {@code prep(heard, of)} and {@code pcomp(of, attacking)} are * collapsed to {@code prepc_of(heard, attacking)}
*
conjunct dependencies
*
* {@code cc(investors, and)} and * {@code conj(investors, regulators)} are collapsed to * {@code conj_and(investors,regulators)}
*
possessive dependencies: possessive
*
* {@code possessive(Montezuma, 's)} will be erased. This is like a collapsing, but * due to the flatness of NPs, two dependencies are not actually composed.
*
For relative clauses, it will collapse referent
*
* {@code ref(man, that)} and {@code dobj(love, that)} are collapsed * to {@code dobj(love, man)}
*
*/ @Override protected void collapseDependencies(List list, boolean CCprocess, Extras includeExtras) { if (DEBUG) { printListSorted("collapseDependencies: CCproc: " + CCprocess + " includeExtras: " + includeExtras, list); } correctDependencies(list); if (DEBUG) { printListSorted("After correctDependencies:", list); } eraseMultiConj(list); if (DEBUG) { printListSorted("After collapse multi conj:", list); } collapse2WP(list); if (DEBUG) { printListSorted("After collapse2WP:", list); } collapseFlatMWP(list); if (DEBUG) { printListSorted("After collapseFlatMWP:", list); } collapse2WPbis(list); if (DEBUG) { printListSorted("After collapse2WPbis:", list); } collapse3WP(list); if (DEBUG) { printListSorted("After collapse3WP:", list); } collapsePrepAndPoss(list); if (DEBUG) { printListSorted("After PrepAndPoss:", list); } collapseConj(list); if (DEBUG) { printListSorted("After conj:", list); } if (includeExtras.doRef) { addRef(list); if (DEBUG) { printListSorted("After adding ref:", list); } if (includeExtras.collapseRef) { collapseReferent(list); if (DEBUG) { printListSorted("After collapse referent:", list); } } } if (CCprocess) { treatCC(list); if (DEBUG) { printListSorted("After treatCC:", list); } } if (includeExtras.doSubj) { addExtraNSubj(list); if (DEBUG) { printListSorted("After adding extra nsubj:", list); } correctSubjPass(list); if (DEBUG) { printListSorted("After correctSubjPass:", list); } } removeDep(list); if (DEBUG) { printListSorted("After remove dep:", list); } Collections.sort(list); if (DEBUG) { printListSorted("After all collapse:", list); } } @Override protected void collapseDependenciesTree(List list) { collapseDependencies(list, false, Extras.NONE); } /** * Does some hard coding to deal with relation in CONJP. For now we deal with: * but not, if not, instead of, rather than, but rather GO TO negcc
* as well as, not to mention, but also, & GO TO and. * * @param conj The head dependency of the conjunction marker * @return A GrammaticalRelation made from a normalized form of that * conjunction. */ private static GrammaticalRelation conjValue(String conj) { String newConj = conj.toLowerCase(); if (newConj.equals("not") || newConj.equals("instead") || newConj.equals("rather")) { newConj = "negcc"; } else if (newConj.equals("mention") || newConj.equals("to") || newConj.equals("also") || newConj.contains("well") || newConj.equals("&")) { newConj = "and"; } return EnglishGrammaticalRelations.getConj(newConj); } private static void treatCC(Collection list) { // Construct a map from tree nodes to the set of typed // dependencies in which the node appears as dependent. Map> map = Generics.newHashMap(); // Construct a map of tree nodes being governor of a subject grammatical // relation to that relation Map subjectMap = Generics.newHashMap(); // Construct a set of TreeGraphNodes with a passive auxiliary on them Set withPassiveAuxiliary = Generics.newHashSet(); // Construct a map of tree nodes being governor of an object grammatical // relation to that relation // Map objectMap = new // HashMap(); List rcmodHeads = Generics.newArrayList(); List prepcDep = Generics.newArrayList(); for (TypedDependency typedDep : list) { if (!map.containsKey(typedDep.dep())) { // NB: Here and in other places below, we use a TreeSet (which extends // SortedSet) to guarantee that results are deterministic) map.put(typedDep.dep(), new TreeSet<>()); } map.get(typedDep.dep()).add(typedDep); if (typedDep.reln().equals(AUX_PASSIVE_MODIFIER)) { withPassiveAuxiliary.add(typedDep.gov()); } // look for subjects if (typedDep.reln().getParent() == NOMINAL_SUBJECT || typedDep.reln().getParent() == SUBJECT || typedDep.reln().getParent() == CLAUSAL_SUBJECT) { if (!subjectMap.containsKey(typedDep.gov())) { subjectMap.put(typedDep.gov(), typedDep); } } // look for objects // this map was only required by the code commented out below, so comment // it out too // if (typedDep.reln() == DIRECT_OBJECT) { // if (!objectMap.containsKey(typedDep.gov())) { // objectMap.put(typedDep.gov(), typedDep); // } // } // look for rcmod relations if (typedDep.reln() == RELATIVE_CLAUSE_MODIFIER) { rcmodHeads.add(typedDep.gov()); } // look for prepc relations: put the dependent of such a relation in the // list // to avoid wrong propagation of dobj if (typedDep.reln().toString().startsWith("prepc")) { prepcDep.add(typedDep.dep()); } } // log.info(map); // if (DEBUG) log.info("Subject map: " + subjectMap); // if (DEBUG) log.info("Object map: " + objectMap); // log.info(rcmodHeads); // create a new list of typed dependencies Collection newTypedDeps = new ArrayList<>(list); // find typed deps of form conj(gov,dep) for (TypedDependency td : list) { if (EnglishGrammaticalRelations.getConjs().contains(td.reln())) { IndexedWord gov = td.gov(); IndexedWord dep = td.dep(); // look at the dep in the conjunct Set gov_relations = map.get(gov); // log.info("gov " + gov); if (gov_relations != null) { for (TypedDependency td1 : gov_relations) { // log.info("gov rel " + td1); IndexedWord newGov = td1.gov(); // in the case of errors in the basic dependencies, it // is possible to have overlapping newGov & dep if (newGov.equals(dep)) { continue; } GrammaticalRelation newRel = td1.reln(); if (newRel != ROOT) { if (rcmodHeads.contains(gov) && rcmodHeads.contains(dep)) { // to prevent wrong propagation in the case of long dependencies in relative clauses if (newRel != DIRECT_OBJECT && newRel != NOMINAL_SUBJECT) { if (DEBUG) { log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep + " (subj/obj case)"); } newTypedDeps.add(new TypedDependency(newRel, newGov, dep)); } } else { if (DEBUG) { log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep); } newTypedDeps.add(new TypedDependency(newRel, newGov, dep)); } } } } // propagate subjects // look at the gov in the conjunct: if it is has a subject relation, // the dep is a verb and the dep doesn't have a subject relation // then we want to add a subject relation for the dep. // (By testing for the dep to be a verb, we are going to miss subject of // copula verbs! but // is it safe to relax this assumption?? i.e., just test for the subject // part) // CDM 2008: I also added in JJ, since participial verbs are often // tagged JJ String tag = dep.tag(); if (subjectMap.containsKey(gov) && (tag.startsWith("VB") || tag.startsWith("JJ")) && ! subjectMap.containsKey(dep)) { TypedDependency tdsubj = subjectMap.get(gov); // check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then // add nsubj (if it is tagged correctly, should do this for VBD too, but we don't) GrammaticalRelation relation = tdsubj.reln(); if (relation == NOMINAL_PASSIVE_SUBJECT) { if (isDefinitelyActive(tag)) { relation = NOMINAL_SUBJECT; } } else if (relation == CLAUSAL_PASSIVE_SUBJECT) { if (isDefinitelyActive(tag)) { relation = CLAUSAL_SUBJECT; } } else if (relation == NOMINAL_SUBJECT) { if (withPassiveAuxiliary.contains(dep)) { relation = NOMINAL_PASSIVE_SUBJECT; } } else if (relation == CLAUSAL_SUBJECT) { if (withPassiveAuxiliary.contains(dep)) { relation = CLAUSAL_PASSIVE_SUBJECT; } } if (DEBUG) { log.info("Adding new " + relation + " dependency from " + dep + " to " + tdsubj.dep() + " (subj propagation case)"); } newTypedDeps.add(new TypedDependency(relation, dep, tdsubj.dep())); } // propagate objects // cdm july 2010: This bit of code would copy a dobj from the first // clause to a later conjoined clause if it didn't // contain its own dobj or prepc. But this is too aggressive and wrong // if the later clause is intransitive // (including passivized cases) and so I think we have to not have this // done always, and see no good "sometimes" heuristic. // IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp // (SBAR). // if (objectMap.containsKey(gov) && // dep.tag().startsWith("VB") && ! objectMap.containsKey(dep) // && ! prepcDep.contains(gov)) { // TypedDependency tdobj = objectMap.get(gov); // if (DEBUG) { // log.info("Adding new " + tdobj.reln() + " dependency from " // + dep + " to " + tdobj.dep() + " (obj propagation case)"); // } // newTypedDeps.add(new TypedDependency(tdobj.reln(), dep, // tdobj.dep())); // } } } list.clear(); list.addAll(newTypedDeps); } private static boolean isDefinitelyActive(String tag) { // we should include VBD, but don't as it is often a tagging mistake. return tag.equals("VB") || tag.equals("VBZ") || tag.equals("VBP") || tag.startsWith("JJ"); } /** * This rewrites the "conj" relation to "conj_word" and deletes cases of the * "cc" relation providing this rewrite has occurred (but not if there is only * something like a clause-initial and). For instance, cc(elected-5, and-9) * conj(elected-5, re-elected-11) becomes conj_and(elected-5, re-elected-11) * * @param list List of dependencies. */ private static void collapseConj(Collection list) { List govs = Generics.newArrayList(); // find typed deps of form cc(gov, dep) for (TypedDependency td : list) { if (td.reln() == COORDINATION) { // i.e. "cc" IndexedWord gov = td.gov(); GrammaticalRelation conj = conjValue(td.dep().value()); if (DEBUG) { log.info("Set conj to " + conj + " based on " + td); } // find other deps of that gov having reln "conj" boolean foundOne = false; for (TypedDependency td1 : list) { if (td1.gov().equals(gov)) { if (td1.reln() == CONJUNCT) { // i.e., "conj" // change "conj" to the actual (lexical) conjunction if (DEBUG) { log.info("Changing " + td1 + " to have relation " + conj); } td1.setReln(conj); foundOne = true; } else if (td1.reln() == COORDINATION) { conj = conjValue(td1.dep().value()); if (DEBUG) { log.info("Set conj to " + conj + " based on " + td1); } } } } // register to remove cc from this governor if (foundOne) { govs.add(gov); } } } // now remove typed dependencies with reln "cc" if we have successfully // collapsed list.removeIf(td2 -> td2.reln() == COORDINATION && govs.contains(td2.gov())); } /** * This method will collapse a referent relation such as follows. e.g.: * "The man that I love … " ref(man, that) dobj(love, that) -> * dobj(love, man) */ private static void collapseReferent(Collection list) { // find typed deps of form ref(gov, dep) // put them in a List for processing; remove them from the set of deps List refs = new ArrayList<>(); for (Iterator iter = list.iterator(); iter.hasNext();) { TypedDependency td = iter.next(); if (td.reln() == REFERENT) { refs.add(td); iter.remove(); } } // now substitute target of referent where possible for (TypedDependency ref : refs) { IndexedWord dep = ref.dep();// take the relative word IndexedWord ant = ref.gov();// take the antecedent for (TypedDependency td : list) { // the last condition below maybe shouldn't be necessary, but it has // helped stop things going haywire a couple of times (it stops the // creation of a unit cycle that probably leaves something else // disconnected) [cdm Jan 2010] if (td.dep().equals(dep) && td.reln() != REFERENT && !td.gov().equals(ant)) { if (DEBUG) { log.info("referent: changing " + td); } td.setDep(ant); td.setExtra(); if (DEBUG) { log.info(" to " + td); } } } } } /** * Look for ref rules for a given word. We look through the * children and grandchildren of the rcmod dependency, and if any * children or grandchildren depend on a that/what/which/etc word, * we take the leftmost that/what/which/etc word as the dependent * for the ref TypedDependency. */ private static void addRef(Collection list) { List newDeps = new ArrayList<>(); for (TypedDependency rcmod : list) { if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) { // we only add ref dependencies across relative clauses continue; } IndexedWord head = rcmod.gov(); IndexedWord modifier = rcmod.dep(); TypedDependency leftChild = null; for (TypedDependency child : list) { if (child.gov().equals(modifier) && EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(child.dep().value()).matches() && (leftChild == null || child.dep().index() < leftChild.dep().index())) { leftChild = child; } } // TODO: could be made more efficient TypedDependency leftGrandchild = null; for (TypedDependency child : list) { if (!child.gov().equals(modifier)) { continue; } for (TypedDependency grandchild : list) { if (grandchild.gov().equals(child.dep()) && EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(grandchild.dep().value()).matches() && (leftGrandchild == null || grandchild.dep().index() < leftGrandchild.dep().index())) { leftGrandchild = grandchild; } } } TypedDependency newDep = null; if (leftGrandchild != null && (leftChild == null || leftGrandchild.dep().index() < leftChild.dep().index())) { newDep = new TypedDependency(REFERENT, head, leftGrandchild.dep()); } else if (leftChild != null) { newDep = new TypedDependency(REFERENT, head, leftChild.dep()); } if (newDep != null) { newDeps.add(newDep); } } for (TypedDependency newDep : newDeps) { if (!list.contains(newDep)) { newDep.setExtra(); list.add(newDep); } } } /** * Add extra nsubj dependencies when collapsing basic dependencies. *
* In the general case, we look for an aux modifier under an xcomp * modifier, and assuming there aren't already associated nsubj * dependencies as daughters of the original xcomp dependency, we * add nsubj dependencies for each nsubj daughter of the aux. *
* There is also a special case for "to" words, in which case we add * a dependency if and only if there is no nsubj associated with the * xcomp and there is no other aux dependency. This accounts for * sentences such as "he decided not to" with no following verb. */ private static void addExtraNSubj(Collection list) { List newDeps = new ArrayList<>(); for (TypedDependency xcomp : list) { if (xcomp.reln() != XCLAUSAL_COMPLEMENT) { // we only add extra nsubj dependencies to some xcomp dependencies continue; } IndexedWord modifier = xcomp.dep(); IndexedWord head = xcomp.gov(); boolean hasSubjectDaughter = false; boolean hasAux = false; List subjects = Generics.newArrayList(); List objects = Generics.newArrayList(); for (TypedDependency dep : list) { // already have a subject dependency if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(modifier)) { hasSubjectDaughter = true; break; } if (dep.reln() == AUX_MODIFIER && dep.gov().equals(modifier)) { hasAux = true; } if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(head)) { subjects.add(dep.dep()); } if (dep.reln() == DIRECT_OBJECT && dep.gov().equals(head)) { objects.add(dep.dep()); } } // if we already have an nsubj dependency, no need to add an extra nsubj if (hasSubjectDaughter) { continue; } if ((modifier.value().equalsIgnoreCase("to") && hasAux) || (!modifier.value().equalsIgnoreCase("to") && !hasAux)) { continue; } // In general, we find that the objects of the verb are better // for extra nsubj than the original nsubj of the verb. For example, // "Many investors wrote asking the SEC to require ..." // There is no nsubj of asking, but the dobj, SEC, is the extra nsubj of require. // Similarly, "The law tells them when to do so" // Instead of nsubj(do, law) we want nsubj(do, them) if (objects.size() > 0) { for (IndexedWord object : objects) { TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, object); newDeps.add(newDep); } } else { for (IndexedWord subject : subjects) { TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, subject); newDeps.add(newDep); } } } for (TypedDependency newDep : newDeps) { if (!list.contains(newDep)) { newDep.setExtra(); list.add(newDep); } } } /** * This method corrects subjects of verbs for which we identified an auxpass, * but didn't identify the subject as passive. * * @param list List of typedDependencies to work on */ private static void correctSubjPass(Collection list) { // put in a list verbs having an auxpass List list_auxpass = new ArrayList<>(); for (TypedDependency td : list) { if (td.reln() == AUX_PASSIVE_MODIFIER) { list_auxpass.add(td.gov()); } } for (TypedDependency td : list) { // correct nsubj if (td.reln() == NOMINAL_SUBJECT && list_auxpass.contains(td.gov())) { // log.info("%%% Changing subj to passive: " + td); td.setReln(NOMINAL_PASSIVE_SUBJECT); } if (td.reln() == CLAUSAL_SUBJECT && list_auxpass.contains(td.gov())) { // log.info("%%% Changing subj to passive: " + td); td.setReln(CLAUSAL_PASSIVE_SUBJECT); } // correct unretrieved poss: dep relation in which the dependent is a // PRP$ or WP$ // cdm: Now done in basic rules. The only cases that this still matches // are (1) tagging mistakes where PRP in dobj position is mistagged PRP$ // or a couple of parsing errors where the dependency is wrong anyway, so // it's probably okay to keep it a dep. So I'm disabling this. // String tag = td.dep().tag(); // if (td.reln() == DEPENDENT && (tag.equals("PRP$") || tag.equals("WP$"))) { // log.info("%%% Unrecognized basic possessive pronoun: " + td); // td.setReln(POSSESSION_MODIFIER); // } } } private static boolean inConjDeps(TypedDependency td, List> conjs) { for (Triple trip : conjs) { if (td.equals(trip.first())) { return true; } } return false; } private static void collapsePrepAndPoss(Collection list) { // Man oh man, how gnarly is the logic of this method.... Collection newTypedDeps = new ArrayList<>(); // Construct a map from tree nodes to the set of typed // dependencies in which the node appears as governor. // cdm: could use CollectionValuedMap here! Map> map = Generics.newHashMap(); List vmod = Generics.newArrayList(); for (TypedDependency typedDep : list) { if (!map.containsKey(typedDep.gov())) { map.put(typedDep.gov(), new TreeSet<>()); } map.get(typedDep.gov()).add(typedDep); if (typedDep.reln() == VERBAL_MODIFIER) { // look for aux deps which indicate this was a to-be verb boolean foundAux = false; for (TypedDependency auxDep : list) { if (auxDep.reln() != AUX_MODIFIER) { continue; } if (!auxDep.gov().equals(typedDep.dep()) || !auxDep.dep().value().equalsIgnoreCase("to")) { continue; } foundAux = true; break; } if (!foundAux) { vmod.add(typedDep.dep()); } } } // log.info("here's the vmod list: " + vmod); // Do preposition conjunction interaction for // governor p NP and p NP case ... a lot of special code cdm jan 2006 for (TypedDependency td1 : list) { if (td1.reln() != PREPOSITIONAL_MODIFIER) { continue; } IndexedWord td1Dep = td1.dep(); SortedSet possibles = map.get(td1Dep); if (possibles == null) { continue; } // look for the "second half" // unique: the head prep and whether it should be pobj Pair prepDep = null; TypedDependency ccDep = null; // treat as unique // list of dep and prepOtherDep and pobj (or pcomp) List> conjs = new ArrayList<>(); Set otherDtrs = new TreeSet<>(); // first look for a conj(prep, prep) (there might be several conj relations!!!) boolean samePrepositionInEachConjunct = true; int conjIndex = -1; for (TypedDependency td2 : possibles) { if (td2.reln() == CONJUNCT) { IndexedWord td2Dep = td2.dep(); String td2DepPOS = td2Dep.tag(); if (td2DepPOS.equals("IN") || td2DepPOS.equals("TO")) { samePrepositionInEachConjunct = samePrepositionInEachConjunct && td2Dep.value().equals(td1Dep.value()); Set possibles2 = map.get(td2Dep); boolean pobj = true;// default of collapsing preposition is prep_ TypedDependency prepOtherDep = null; if (possibles2 != null) { for (TypedDependency td3 : possibles2) { IndexedWord td3Dep = td3.dep(); String td3DepPOS = td3Dep.tag(); // CDM Mar 2006: I put in disjunction here when I added in // PREPOSITIONAL_OBJECT. If it catches all cases, we should // be able to delete the DEPENDENT disjunct // maybe better to delete the DEPENDENT disjunct - it creates // problem with multiple prep (mcdm) if ((td3.reln() == PREPOSITIONAL_OBJECT || td3.reln() == PREPOSITIONAL_COMPLEMENT) && (!(td3DepPOS.equals("IN") || td3DepPOS.equals("TO"))) && prepOtherDep == null) { prepOtherDep = td3; if (td3.reln() == PREPOSITIONAL_COMPLEMENT) { pobj = false; } } else { otherDtrs.add(td3); } } } if (conjIndex < td2Dep.index()) { conjIndex = td2Dep.index(); } conjs.add(new Triple<>(td2, prepOtherDep, pobj)); } } } // end td2:possibles if (conjs.isEmpty()) { continue; } // if we have a conj under a preposition dependency, we look for the other // parts String td1DepPOS = td1Dep.tag(); for (TypedDependency td2 : possibles) { // we look for the cc linked to this conjDep // the cc dep must have an index smaller than the dep of conjDep if (td2.reln() == COORDINATION && td2.dep().index() < conjIndex) { ccDep = td2; } else { IndexedWord td2Dep = td2.dep(); String td2DepPOS = td2Dep.tag(); // log.info("prepDep find: td1.reln: " + td1.reln() + // "; td2.reln: " + td2.reln() + "; td1DepPos: " + td1DepPOS + // "; td2DepPos: " + td2DepPOS + "; index " + index + // "; td2.dep().index(): " + td2.dep().index()); if ((td2.reln() == DEPENDENT || td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && prepDep == null && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO")))) { // same index trick, in case we have multiple deps // I deleted this to see if it helped [cdm Jan 2010] && // td2.dep().index() < index) prepDep = new Pair<>(td2, td2.reln() != PREPOSITIONAL_COMPLEMENT); } else if (!inConjDeps(td2, conjs)) {// don't want to add the conjDep // again! otherDtrs.add(td2); } } } if (prepDep == null || ccDep == null) { continue; // we can't deal with it in the hairy prep/conj interaction case! } if (DEBUG) { // ccDep must be non-null given test above log.info("!! Conj and prep case:"); log.info(" td1 (prep): " + td1); log.info(" Kids of td1 are: " + possibles); log.info(" prepDep: " + prepDep); log.info(" ccDep: " + ccDep); log.info(" conjs: " + conjs); log.info(" samePrepositionInEachConjunct: " + samePrepositionInEachConjunct); log.info(" otherDtrs: " + otherDtrs); } // check if we have the same prepositions in the conjunction if (samePrepositionInEachConjunct) { // conjDep != null && prepOtherDep != // null && // OK, we have a conjunction over parallel PPs: Fred flew to Greece and // to Serbia. GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second()); TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep()); newTypedDeps.add(tdNew); if (DEBUG) { log.info("PrepPoss Conj branch (two parallel PPs) adding: " + tdNew); log.info(" removing: " + td1 + " " + prepDep + " " + ccDep); } td1.setReln(KILL);// remember these are "used up" prepDep.first().setReln(KILL); ccDep.setReln(KILL); for (Triple trip : conjs) { TypedDependency conjDep = trip.first(); TypedDependency prepOtherDep = trip.second(); if (prepOtherDep == null) { // CDM July 2010: I think this should only ever happen if there is a // misparse, but it has happened in such circumstances. You have // something like (PP in or in (NP Serbia)), with the two // prepositions the same. We just clean up the mess. if (DEBUG) { log.info(" apparent misparse: same P twice with only one NP object (prepOtherDep is null)"); log.info(" removing: " + conjDep); } ccDep.setReln(KILL); } else { TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), prepDep.first().dep(), prepOtherDep.dep()); newTypedDeps.add(tdNew2); if (DEBUG) { log.info(" adding: " + tdNew2); log.info(" removing: " + conjDep + " " + prepOtherDep); } prepOtherDep.setReln(KILL); } conjDep.setReln(KILL); } // promote dtrs that would be orphaned for (TypedDependency otd : otherDtrs) { if (DEBUG) { log.info("Changed " + otd); } otd.setGov(td1.gov()); if (DEBUG) { log.info(" to " + otd); } } // Now we need to see if there are any TDs that will be "orphaned" // by this collapse. Example: if we have: // dep(drew, on) // dep(on, book) // dep(on, right) // the first two will be collapsed to on(drew, book), but then // the third one will be orphaned, since its governor no // longer appears. So, change its governor to 'drew'. // CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now // it does, since they're not automatically deleted // Some things in possibles may have already been changed, so check gov if (DEBUG) { log.info("td1: " + td1 + "; possibles: " + possibles); } for (TypedDependency td2 : possibles) { // if (DEBUG) { // log.info("[a] td2.reln " + td2.reln() + " td2.gov " + // td2.gov() + " td1.dep " + td1.dep()); // } if (td2.reln() != KILL && td2.gov().equals(td1.dep())) { // && td2.reln() // != COORDINATION // && td2.reln() // != CONJUNCT if (DEBUG) { log.info("Changing " + td2 + " to have governor of " + td1 + " [a]"); } td2.setGov(td1.gov()); } } continue; // This one has been dealt with successfully } // end same prepositions // case of "Lufthansa flies to and from Serbia". Make it look like next // case :-) // that is, the prepOtherDep should be the same as prepDep ! for (Triple trip : conjs) { if (trip.first() != null && trip.second() == null) { trip.setSecond(new TypedDependency(prepDep.first().reln(), trip.first().dep(), prepDep.first().dep())); trip.setThird(prepDep.second()); } } // we have two different prepositions in the conjunction // in this case we need to add a node // "Bill jumped over the fence and through the hoop" // prep_over(jumped, fence) // conj_and(jumped, jumped) // prep_through(jumped, hoop) // Extra complication: // If "jumped" is already part of a conjunction, we should add the new one off that rather than chaining IndexedWord conjHead = td1.gov(); for (TypedDependency td3 : list) { if (td3.dep().equals(td1.gov()) && td3.reln().equals(CONJUNCT)) { conjHead = td3.gov(); } } GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second()); TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep()); newTypedDeps.add(tdNew); if (DEBUG) { log.info("ConjPP (different preps) adding: " + tdNew); log.info(" deleting: " + td1 + " " + prepDep.first() + " " + ccDep); } td1.setReln(KILL);// remember these are "used up" prepDep.first().setReln(KILL); ccDep.setReln(KILL); // so far we added the first prep grammatical relation int copyNumber = 1; for (Triple trip : conjs) { TypedDependency conjDep = trip.first(); TypedDependency prepOtherDep = trip.second(); boolean pobj = trip.third(); // OK, we have a conjunction over different PPs // we create a new node; // in order to make a distinction between the original node and its copy // we set the "copyCount" variable in the IndexedWord // existence of copyCount > 0 is checked at printing (toString method of // TypedDependency) IndexedWord label = td1.gov().makeSoftCopy(copyNumber); copyNumber++; // now we add the conjunction relation between conjHead (either td1.gov // or what it is itself conjoined with) and the copy // the copy has the same label as td1.gov() but is another TreeGraphNode // todo: Or that's the plan; there are a couple of knock on changes to fix before we can do this! // TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), conjHead, label); TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), td1.gov(), label); newTypedDeps.add(tdNew2); // now we still need to add the second prep grammatical relation // between the copy and the dependent of the prepOtherDep node TypedDependency tdNew3; GrammaticalRelation reln2 = determinePrepRelation(map, vmod, conjDep, td1, pobj); tdNew3 = new TypedDependency(reln2, label, prepOtherDep.dep()); newTypedDeps.add(tdNew3); if (DEBUG) { log.info(" adding: " + tdNew2 + " " + tdNew3); log.info(" deleting: " + conjDep + " " + prepOtherDep); } conjDep.setReln(KILL); prepOtherDep.setReln(KILL); // promote dtrs that would be orphaned for (TypedDependency otd : otherDtrs) { // special treatment for prepositions: the original relation is // likely to be a "dep" and we want this to be a "prep" if (otd.dep().tag().equals("IN")) { otd.setReln(PREPOSITIONAL_MODIFIER); } otd.setGov(td1.gov()); } } // Now we need to see if there are any TDs that will be "orphaned" off // the first preposition // by this collapse. Example: if we have: // dep(drew, on) // dep(on, book) // dep(on, right) // the first two will be collapsed to on(drew, book), but then // the third one will be orphaned, since its governor no // longer appears. So, change its governor to 'drew'. // CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now // it does, since they're not automatically deleted for (TypedDependency td2 : possibles) { if (td2.reln() != KILL) { // && td2.reln() != COORDINATION && // td2.reln() != CONJUNCT) { if (DEBUG) { log.info("Changing " + td2 + " to have governor of " + td1 + " [b]"); } td2.setGov(td1.gov()); } } // end for different prepositions } // for TypedDependency td1 : list // below here is the single preposition/possessor basic case!! for (TypedDependency td1 : list) { if (td1.reln() == KILL) { continue; } IndexedWord td1Dep = td1.dep(); String td1DepPOS = td1Dep.tag(); // find all other typedDeps having our dep as gov Set possibles = map.get(td1Dep); if (possibles != null && (td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) { // look for the "second half" boolean pobj = true;// default for prep relation is prep_ for (TypedDependency td2 : possibles) { if (td2.reln() != COORDINATION && td2.reln() != CONJUNCT) { IndexedWord td2Dep = td2.dep(); String td2DepPOS = td2Dep.tag(); if ((td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) { if (td2.reln() == POSSESSIVE_MODIFIER) { if ( ! map.containsKey(td2Dep)) { // if 's has no kids of its own (it shouldn't!) td2.setReln(KILL); } } } else if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO"))) && !isConjWithNoPrep(td2.gov(), possibles)) { // we don't collapse preposition conjoined with a non-preposition // to avoid disconnected constituents // OK, we have a pair td1, td2 to collapse to td3 if (DEBUG) { log.info("(Single prep/poss base case collapsing " + td1 + " and " + td2); } // check whether we are in a pcomp case: if (td2.reln() == PREPOSITIONAL_COMPLEMENT) { pobj = false; } GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, pobj); TypedDependency td3 = new TypedDependency(reln, td1.gov(), td2.dep()); if (DEBUG) { log.info("PP adding: " + td3 + " deleting: " + td1 + ' ' + td2); } // add it to map to deal with recursive cases like "achieved this (PP (PP in part) with talent)" map.get(td3.gov()).add(td3); newTypedDeps.add(td3); td1.setReln(KILL);// remember these are "used up" td2.setReln(KILL);// remember these are "used up" } } } // for TypedDependency td2 } // Now we need to see if there are any TDs that will be "orphaned" // by this collapse. Example: if we have: // dep(drew, on) // dep(on, book) // dep(on, right) // the first two will be collapsed to on(drew, book), but then // the third one will be orphaned, since its governor no // longer appears. So, change its governor to 'drew'. // CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now // it does, since they're not automatically deleted if (possibles != null && td1.reln() == KILL) { for (TypedDependency td2 : possibles) { if (td2.reln() != KILL) { // && td2.reln() != COORDINATION && // td2.reln() != CONJUNCT) { if (DEBUG) { log.info("Changing " + td2 + " to have governor of " + td1 + " [c]"); } td2.setGov(td1.gov()); } } } } // for TypedDependency td1 // now remove typed dependencies with reln "kill" and add new ones. for (Iterator iter = list.iterator(); iter.hasNext();) { TypedDependency td = iter.next(); if (td.reln() == KILL) { if (DEBUG) { log.info("Removing dep killed in poss/prep (conj) collapse: " + td); } iter.remove(); } } list.addAll(newTypedDeps); } // end collapsePrepAndPoss() /** Work out prep relation name. pc is the dependency whose dep() is the * preposition to do a name for. topPrep may be the same or different. * Among the daughters of its gov is where to look for an auxpass. */ private static GrammaticalRelation determinePrepRelation(Map> map, List vmod, TypedDependency pc, TypedDependency topPrep, boolean pobj) { // handling the case of an "agent": // the governor of a "by" preposition must have an "auxpass" dependency // or be the dependent of a "vmod" relation // if it is the case, the "agent" variable becomes true boolean agent = false; String preposition = pc.dep().value().toLowerCase(); if (preposition.equals("by")) { // look if we have an auxpass Set aux_pass_poss = map.get(topPrep.gov()); if (aux_pass_poss != null) { for (TypedDependency td_pass : aux_pass_poss) { if (td_pass.reln() == AUX_PASSIVE_MODIFIER) { agent = true; } } } // look if we have a vmod if (!vmod.isEmpty() && vmod.contains(topPrep.gov())) { agent = true; } } GrammaticalRelation reln; if (agent) { reln = AGENT; } else { // for prepositions, use the preposition // for pobj: we collapse into "prep"; for pcomp: we collapse into "prepc" if (pobj) { reln = EnglishGrammaticalRelations.getPrep(preposition); } else { reln = EnglishGrammaticalRelations.getPrepC(preposition); } } return reln; } // used by collapse2WP(), collapseFlatMWP(), collapse2WPbis() KEPT IN // ALPHABETICAL ORDER private static final String[][] MULTIWORD_PREPS = { { "according", "to" }, { "across", "from" }, { "ahead", "of" }, { "along", "with" }, { "alongside", "of" }, { "apart", "from" }, { "as", "for" }, { "as", "from" }, { "as", "of" }, { "as", "per" }, { "as", "to" }, { "aside", "from" }, { "away", "from" }, { "based", "on" }, { "because", "of" }, { "close", "by" }, { "close", "to" }, { "contrary", "to" }, { "compared", "to" }, { "compared", "with" }, { "due", "to" }, { "depending", "on" }, { "except", "for" }, { "exclusive", "of" }, { "far", "from" }, { "followed", "by" }, { "inside", "of" }, { "instead", "of" }, { "irrespective", "of" }, { "next", "to" }, { "near", "to" }, { "off", "of" }, { "out", "of" }, { "outside", "of" }, { "owing", "to" }, { "preliminary", "to" }, { "preparatory", "to" }, { "previous", "to" }, { "prior", "to" }, { "pursuant", "to" }, { "regardless", "of" }, { "subsequent", "to" }, { "such", "as" }, { "thanks", "to" }, { "together", "with" } }; // used by collapse3WP() KEPT IN ALPHABETICAL ORDER private static final String[][] THREEWORD_PREPS = { { "by", "means", "of" }, { "in", "accordance", "with" }, { "in", "addition", "to" }, { "in", "case", "of" }, { "in", "front", "of" }, { "in", "lieu", "of" }, { "in", "place", "of" }, { "in", "spite", "of" }, { "on", "account", "of" }, { "on", "behalf", "of" }, { "on", "top", "of" }, { "with", "regard", "to" }, { "with", "respect", "to" } }; /** * Given a list of typedDependencies, returns true if the node "node" is the * governor of a conj relation with a dependent which is not a preposition * * @param node * A node in this GrammaticalStructure * @param list * A list of typedDependencies * @return true If node is the governor of a conj relation in the list with * the dep not being a preposition */ private static boolean isConjWithNoPrep(IndexedWord node, Collection list) { for (TypedDependency td : list) { if (td.gov().equals(node) && td.reln() == CONJUNCT) { // we have a conjunct // check the POS of the dependent String tdDepPOS = td.dep().tag(); if (!(tdDepPOS.equals("IN") || tdDepPOS.equals("TO"))) { return true; } } } return false; } /** * Collapse multiword preposition of the following format: * prep|advmod|dep|amod(gov, mwp[0])
* dep(mpw[0],mwp[1])
* pobj|pcomp(mwp[1], compl) or pobj|pcomp(mwp[0], compl)
* -> prep_mwp[0]_mwp[1](gov, compl)
* * prep|advmod|dep|amod(gov, mwp[1])
* dep(mpw[1],mwp[0])
* pobj|pcomp(mwp[1], compl) or pobj|pcomp(mwp[0], compl)
* -> prep_mwp[0]_mwp[1](gov, compl) *

* * The collapsing has to be done at once in order to know exactly which node * is the gov and the dep of the multiword preposition. Otherwise this can * lead to problems: removing a non-multiword "to" preposition for example!!! * This method replaces the old "collapsedMultiWordPreps" * * @param list * list of typedDependencies to work on */ private static void collapse2WP(Collection list) { Collection newTypedDeps = new ArrayList<>(); for (String[] mwp : MULTIWORD_PREPS) { // first look for patterns such as: // X(gov, mwp[0]) // Y(mpw[0],mwp[1]) // Z(mwp[1], compl) or Z(mwp[0], compl) // -> prep_mwp[0]_mwp[1](gov, compl) collapseMultiWordPrep(list, newTypedDeps, mwp[0], mwp[1], mwp[0], mwp[1]); // now look for patterns such as: // X(gov, mwp[1]) // Y(mpw[1],mwp[0]) // Z(mwp[1], compl) or Z(mwp[0], compl) // -> prep_mwp[0]_mwp[1](gov, compl) collapseMultiWordPrep(list, newTypedDeps, mwp[0], mwp[1], mwp[1], mwp[0]); } } /** * Collapse multiword preposition of the following format: * prep|advmod|dep|amod(gov, mwp0) dep(mpw0,mwp1) pobj|pcomp(mwp1, compl) or * pobj|pcomp(mwp0, compl) -> prep_mwp0_mwp1(gov, compl) *

* * @param list List of typedDependencies to work on, * @param newTypedDeps List of typedDependencies that we construct * @param str_mwp0 First part of the multiword preposition to construct the collapsed * preposition * @param str_mwp1 Second part of the multiword preposition to construct the * collapsed preposition * @param w_mwp0 First part of the multiword preposition that we look for * @param w_mwp1 Second part of the multiword preposition that we look for */ private static void collapseMultiWordPrep(Collection list, Collection newTypedDeps, String str_mwp0, String str_mwp1, String w_mwp0, String w_mwp1) { // first find the multiword_preposition: dep(mpw[0], mwp[1]) // the two words should be next to another in the sentence (difference of // indexes = 1) IndexedWord mwp0 = null; IndexedWord mwp1 = null; TypedDependency dep = null; for (TypedDependency td : list) { if (td.gov().value().equalsIgnoreCase(w_mwp0) && td.dep().value().equalsIgnoreCase(w_mwp1) && Math.abs(td.gov().index() - td.dep().index()) == 1) { mwp0 = td.gov(); mwp1 = td.dep(); dep = td; } } if (mwp0 == null) { return; } // now search for prep|advmod|dep|amod(gov, mwp0) IndexedWord governor = null; TypedDependency prep = null; for (TypedDependency td1 : list) { if ((td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == ADVERBIAL_MODIFIER || td1.reln() == ADJECTIVAL_MODIFIER || td1.reln() == DEPENDENT || td1.reln() == MULTI_WORD_EXPRESSION) && td1.dep().equals(mwp0)) { // we found prep|advmod|dep|amod(gov, mwp0) prep = td1; governor = prep.gov(); } } if (prep == null) { return; } // search for the complement: pobj|pcomp(mwp1,X) // or for pobj|pcomp(mwp0,X) // There may be more than one in weird constructions; if there are several, // take the one with the LOWEST index! TypedDependency pobj = null; TypedDependency newtd = null; for (TypedDependency td2 : list) { if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td2.gov().equals(mwp1) || td2.gov().equals(mwp0))) { if (pobj == null || pobj.dep().index() > td2.dep().index()) { pobj = td2; // create the new gr relation GrammaticalRelation gr; if (td2.reln() == PREPOSITIONAL_COMPLEMENT) { gr = EnglishGrammaticalRelations.getPrepC(str_mwp0 + '_' + str_mwp1); } else { gr = EnglishGrammaticalRelations.getPrep(str_mwp0 + '_' + str_mwp1); } if (governor != null) { newtd = new TypedDependency(gr, governor, pobj.dep()); } } } } if (pobj == null || newtd == null) { return; } // only if we found the three parts, set to KILL and remove // and add the new one // Necessarily from the above: prep != null, dep != null, pobj != null, newtd != null if (DEBUG) { log.info("Removing " + prep + ", " + dep + ", and " + pobj); log.info(" and adding " + newtd); } prep.setReln(KILL); dep.setReln(KILL); pobj.setReln(KILL); newTypedDeps.add(newtd); // now remove typed dependencies with reln "kill" // and promote possible orphans for (TypedDependency td1 : list) { if (td1.reln() != KILL) { if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1)) { // CDM: Thought of adding this in Jan 2010, but it causes // conflicting relations tmod vs. pobj. Needs more thought // maybe restrict pobj to first NP in PP, and allow tmod for a later // one? if (td1.reln() == TEMPORAL_MODIFIER) { // special case when an extra NP-TMP is buried in a PP for // "during the same period last year" td1.setGov(pobj.dep()); } else { td1.setGov(governor); } } if (!newTypedDeps.contains(td1)) { newTypedDeps.add(td1); } } } list.clear(); list.addAll(newTypedDeps); } /** * Collapse multi-words preposition of the following format: advmod|prt(gov, * mwp[0]) prep(gov,mwp[1]) pobj|pcomp(mwp[1], compl) -> * prep_mwp[0]_mwp[1](gov, compl) *

* * @param list * List of typedDependencies to work on */ private static void collapse2WPbis(Collection list) { Collection newTypedDeps = new ArrayList<>(); for (String[] mwp : MULTIWORD_PREPS) { newTypedDeps.clear(); IndexedWord mwp0 = null; IndexedWord mwp1 = null; IndexedWord governor = null; TypedDependency prep = null; TypedDependency dep = null; TypedDependency pobj = null; TypedDependency newtd = null; // first find the first part of the multi_preposition: advmod|prt(gov, mwp[0]) for (TypedDependency td : list) { if (td.dep().value().equalsIgnoreCase(mwp[0]) && (td.reln() == PHRASAL_VERB_PARTICLE || td.reln() == ADVERBIAL_MODIFIER || td.reln() == DEPENDENT || td.reln() == MULTI_WORD_EXPRESSION)) { // we found advmod(gov, mwp0) or prt(gov, mwp0) governor = td.gov(); mwp0 = td.dep(); dep = td; } } // now search for the second part: prep(gov, mwp1) // the two words in the mwp should be next to another in the sentence // (difference of indexes = 1) if (mwp0 == null || governor == null) { continue; } for (TypedDependency td1 : list) { if (td1.reln() == PREPOSITIONAL_MODIFIER && td1.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td1.dep().index() - mwp0.index()) == 1 && td1.gov().equals(governor)) {// we // found // prep(gov, // mwp1) mwp1 = td1.dep(); prep = td1; } } if (mwp1 == null) { continue; } // search for the complement: pobj|pcomp(mwp1,X) for (TypedDependency td2 : list) { if (td2.reln() == PREPOSITIONAL_OBJECT && td2.gov().equals(mwp1)) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1]); newtd = new TypedDependency(gr, governor, pobj.dep()); } if (td2.reln() == PREPOSITIONAL_COMPLEMENT && td2.gov().equals(mwp1)) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1]); newtd = new TypedDependency(gr, governor, pobj.dep()); } } if (pobj == null) { return; } // only if we found the three parts, set to KILL and remove // and add the new one // now prep != null, pobj != null and newtd != null prep.setReln(KILL); dep.setReln(KILL); pobj.setReln(KILL); newTypedDeps.add(newtd); // now remove typed dependencies with reln "kill" // and promote possible orphans for (TypedDependency td1 : list) { if (td1.reln() != KILL) { if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1)) { td1.setGov(governor); } if (!newTypedDeps.contains(td1)) { newTypedDeps.add(td1); } } } list.clear(); list.addAll(newTypedDeps); } } /** * Collapse 3-word preposition of the following format:
* This will be the case when the preposition is analyzed as a NP
* prep(gov, mwp0)
* X(mwp0,mwp1)
* X(mwp1,mwp2)
* pobj|pcomp(mwp2, compl)
* -> prep_mwp[0]_mwp[1]_mwp[2](gov, compl) *

* * It also takes flat annotation into account:
* prep(gov,mwp0)
* X(mwp0,mwp1)
* X(mwp0,mwp2)
* pobj|pcomp(mwp0, compl)
* -> prep_mwp[0]_mwp[1]_mwp[2](gov, compl) *

* * * @param list List of typedDependencies to work on */ private static void collapse3WP(Collection list) { Collection newTypedDeps = new ArrayList<>(); // first, loop over the prepositions for NP annotation for (String[] mwp : THREEWORD_PREPS) { newTypedDeps.clear(); IndexedWord mwp0 = null; IndexedWord mwp1 = null; IndexedWord mwp2 = null; TypedDependency dep1 = null; TypedDependency dep2 = null; // first find the first part of the 3word preposition: dep(mpw[0], mwp[1]) // the two words should be next to another in the sentence (difference of // indexes = 1) for (TypedDependency td : list) { if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) { mwp0 = td.gov(); mwp1 = td.dep(); dep1 = td; } } // find the second part of the 3word preposition: dep(mpw[1], mwp[2]) // the two words should be next to another in the sentence (difference of // indexes = 1) for (TypedDependency td : list) { if (td.gov().equals(mwp1) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 1) { mwp2 = td.dep(); dep2 = td; } } if (dep1 != null && dep2 != null) { // now search for prep(gov, mwp0) IndexedWord governor = null; TypedDependency prep = null; for (TypedDependency td1 : list) { if (td1.reln() == PREPOSITIONAL_MODIFIER && td1.dep().equals(mwp0)) {// we // found // prep(gov, // mwp0) prep = td1; governor = prep.gov(); } } // search for the complement: pobj|pcomp(mwp2,X) TypedDependency pobj = null; TypedDependency newtd = null; for (TypedDependency td2 : list) { if (td2.reln() == PREPOSITIONAL_OBJECT && td2.gov().equals(mwp2)) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]); if (governor != null) { newtd = new TypedDependency(gr, governor, pobj.dep()); } } if (td2.reln() == PREPOSITIONAL_COMPLEMENT && td2.gov().equals(mwp2)) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]); if (governor != null) { newtd = new TypedDependency(gr, governor, pobj.dep()); } } } // only if we found the governor and complement parts, set to KILL and // remove // and add the new one if (prep != null && pobj != null && newtd != null) { prep.setReln(KILL); dep1.setReln(KILL); dep2.setReln(KILL); pobj.setReln(KILL); newTypedDeps.add(newtd); // now remove typed dependencies with reln "kill" // and promote possible orphans for (TypedDependency td1 : list) { if (td1.reln() != KILL) { if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) { td1.setGov(governor); } if (!newTypedDeps.contains(td1)) { newTypedDeps.add(td1); } } } list.clear(); list.addAll(newTypedDeps); } } } // second, loop again looking at flat annotation for (String[] mwp : THREEWORD_PREPS) { newTypedDeps.clear(); IndexedWord mwp0 = null; IndexedWord mwp1 = null; IndexedWord mwp2 = null; TypedDependency dep1 = null; TypedDependency dep2 = null; // first find the first part of the 3word preposition: dep(mpw[0], mwp[1]) // the two words should be next to another in the sentence (difference of // indexes = 1) for (TypedDependency td : list) { if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) { mwp0 = td.gov(); mwp1 = td.dep(); dep1 = td; } } // find the second part of the 3word preposition: dep(mpw[0], mwp[2]) // the two words should be one word apart in the sentence (difference of // indexes = 2) for (TypedDependency td : list) { if (td.gov().equals(mwp0) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 2) { mwp2 = td.dep(); dep2 = td; } } if (dep1 != null && dep2 != null) { // now search for prep(gov, mwp0) IndexedWord governor = null; TypedDependency prep = null; for (TypedDependency td1 : list) { if (td1.dep().equals(mwp0) && td1.reln() == PREPOSITIONAL_MODIFIER) {// we // found // prep(gov, // mwp0) prep = td1; governor = prep.gov(); } } // search for the complement: pobj|pcomp(mwp0,X) TypedDependency pobj = null; TypedDependency newtd = null; for (TypedDependency td2 : list) { if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_OBJECT) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]); if (governor != null) { newtd = new TypedDependency(gr, governor, pobj.dep()); } } if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_COMPLEMENT) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]); if (governor != null) { newtd = new TypedDependency(gr, governor, pobj.dep()); } } } // only if we found the governor and complement parts, set to KILL and // remove // and add the new one if (prep != null && pobj != null && newtd != null) { prep.setReln(KILL); dep1.setReln(KILL); dep2.setReln(KILL); pobj.setReln(KILL); newTypedDeps.add(newtd); // now remove typed dependencies with reln "kill" // and promote possible orphans for (TypedDependency td1 : list) { if (td1.reln() != KILL) { if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) { td1.setGov(governor); } if (!newTypedDeps.contains(td1)) { newTypedDeps.add(td1); } } } list.clear(); list.addAll(newTypedDeps); } } } } /* * * While upgrading, here are some lists of common multiword prepositions which * we might try to cover better. (Also do corpus counts for same?) * * (Prague Dependency Treebank) as per CRIT except for RESTR but for RESTR * apart from RESTR away from RESTR aside from RESTR as from TSIN ahead of * TWHEN back of LOC, DIR3 exclusive of* RESTR instead of SUBS outside of LOC, * DIR3 off of DIR1 upwards of LOC, DIR3 as of TSIN because of CAUS inside of * LOC, DIR3 irrespective of REG out of LOC, DIR1 regardless of REG according * to CRIT due to CAUS next to LOC, RESTR owing to* CAUS preparatory to* TWHEN * prior to* TWHEN subsequent to* TWHEN as to/for REG contrary to* CPR close * to* LOC, EXT (except the case named in the next table) near to LOC, DIR3 * nearer to LOC, DIR3 preliminary to* TWHEN previous to* TWHEN pursuant to* * CRIT thanks to CAUS along with ACMP together with ACMP devoid of* ACMP void * of* ACMP * * http://www.keepandshare.com/doc/view.php?u=13166 * * according to ahead of as far as as well as by means of due to far from in * addition to in case of in front of in place of in spite of inside of * instead of in to (into) near to next to on account of on behalf of on top * of on to (onto) out of outside of owing to prior to with regards to * * www.eslmonkeys.com/book/learner/prepositions.pdf According to Ahead of * Along with Apart from As for As to Aside from Because of But for Contrary * to Except for Instead of Next to Out of Prior to Thanks to */ /** * Collapse multi-words preposition of the following format, which comes from * flat annotation. This handles e.g., "because of" (PP (IN because) (IN of) * ...), "such as" (PP (JJ such) (IN as) ...) *

* prep(gov, mwp[1]) dep(mpw[1], mwp[0]) pobj(mwp[1], compl) -> * prep_mwp[0]_mwp[1](gov, compl) * * @param list List of typedDependencies to work on */ private static void collapseFlatMWP(Collection list) { Collection newTypedDeps = new ArrayList<>(); for (String[] mwp : MULTIWORD_PREPS) { newTypedDeps.clear(); IndexedWord mwp1 = null; IndexedWord governor = null; TypedDependency prep = null; TypedDependency dep = null; TypedDependency pobj = null; // first find the multi_preposition: dep(mpw[1], mwp[0]) for (TypedDependency td : list) { if (Math.abs(td.gov().index() - td.dep().index()) == 1 && td.gov().value().equalsIgnoreCase(mwp[1]) && td.dep().value().equalsIgnoreCase(mwp[0])) { mwp1 = td.gov(); dep = td; } } if (mwp1 == null) { continue; } // now search for prep(gov, mwp1) for (TypedDependency td1 : list) { if (td1.dep().equals(mwp1) && td1.reln() == PREPOSITIONAL_MODIFIER) { // we found prep(gov, mwp1) prep = td1; governor = prep.gov(); } } if (prep == null) { continue; } // search for the complement: pobj|pcomp(mwp1,X) for (TypedDependency td2 : list) { if (td2.gov().equals(mwp1) && td2.reln() == PREPOSITIONAL_OBJECT) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1]); newTypedDeps.add(new TypedDependency(gr, governor, pobj.dep())); } if (td2.gov().equals(mwp1) && td2.reln() == PREPOSITIONAL_COMPLEMENT) { pobj = td2; // create the new gr relation GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1]); newTypedDeps.add(new TypedDependency(gr, governor, pobj.dep())); } } if (pobj == null) { return; } // only if we found the three parts, set to KILL and remove // we know prep != null && dep != null && dep != null prep.setReln(KILL); dep.setReln(KILL); pobj.setReln(KILL); // now remove typed dependencies with reln "kill" // and promote possible orphans for (TypedDependency td1 : list) { if (td1.reln() != KILL) { if (td1.gov().equals(mwp1)) { td1.setGov(governor); } if (!newTypedDeps.contains(td1)) { newTypedDeps.add(td1); } } } list.clear(); list.addAll(newTypedDeps); } } /** * This method gets rid of multiwords in conjunctions to avoid having them * creating disconnected constituents e.g., * "bread-1 as-2 well-3 as-4 cheese-5" will be turned into conj_and(bread, * cheese) and then dep(well-3, as-2) and dep(well-3, as-4) cannot be attached * to the graph, these dependencies are erased * * @param list List of words to get rid of multiword conjunctions from */ private static void eraseMultiConj(Collection list) { // find typed deps of form cc(gov, x) for (TypedDependency td1 : list) { if (td1.reln() == COORDINATION) { IndexedWord x = td1.dep(); // find typed deps of form dep(x,y) and kill them for (TypedDependency td2 : list) { if (td2.gov().equals(x) && (td2.reln() == DEPENDENT || td2.reln() == MULTI_WORD_EXPRESSION || td2.reln() == COORDINATION || td2.reln() == ADVERBIAL_MODIFIER || td2.reln() == NEGATION_MODIFIER || td2.reln() == AUX_MODIFIER)) { td2.setReln(KILL); } } } } filterKill(list); } /** * Remove duplicate relations: it can happen when collapsing stranded * prepositions. E.g., "What does CPR stand for?" we get dep(stand, what), and * after collapsing we also get prep_for(stand, what). * * @param list A list of typed dependencies to check through */ private static void removeDep(Collection list) { Set prepRels = Generics.newHashSet(EnglishGrammaticalRelations.getPreps()); prepRels.addAll(EnglishGrammaticalRelations.getPrepsC()); for (TypedDependency td1 : list) { if (prepRels.contains(td1.reln())) { // if we have a prep_ relation IndexedWord gov = td1.gov(); IndexedWord dep = td1.dep(); for (TypedDependency td2 : list) { if (td2.reln() == DEPENDENT && td2.gov().equals(gov) && td2.dep().equals(dep)) { td2.setReln(KILL); } } } } // now remove typed dependencies with reln "kill" for (Iterator iter = list.iterator(); iter.hasNext();) { TypedDependency td = iter.next(); if (td.reln() == KILL) { if (DEBUG) { log.info("Removing duplicate relation: " + td); } iter.remove(); } } } /** * Find and remove any exact duplicates from a dependency list. * For example, the method that "corrects" nsubj dependencies can * turn them into nsubjpass dependencies. If there is some other * source of nsubjpass dependencies, there may now be multiple * copies of the nsubjpass dependency. If the containing data type * is a List, they may both now be in the List. */ private static void removeExactDuplicates(Collection list) { Set set = new TreeSet<>(list); list.clear(); list.addAll(set); } public static List readCoNLLXGrammaticalStructureCollection(String fileName) throws IOException { return readCoNLLXGrammaticalStructureCollection(fileName, EnglishGrammaticalRelations.shortNameToGRel, new FromDependenciesFactory()); } public static EnglishGrammaticalStructure buildCoNLLXGrammaticalStructure(List> tokenFields) { return (EnglishGrammaticalStructure) buildCoNLLXGrammaticalStructure(tokenFields, EnglishGrammaticalRelations.shortNameToGRel, new FromDependenciesFactory()); } public static class FromDependenciesFactory implements GrammaticalStructureFromDependenciesFactory { @Override public EnglishGrammaticalStructure build(List tdeps, TreeGraphNode root) { return new EnglishGrammaticalStructure(tdeps, root); } } public static void main(String[] args) { GrammaticalStructureConversionUtils.convertTrees(args, "en-sd"); } } // end class EnglishGrammaticalStructure





© 2015 - 2024 Weber Informatics LLC | Privacy Policy