edu.stanford.nlp.trees.EnglishGrammaticalStructure Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.trees;
import java.io.*;
import java.util.*;
import java.util.function.Predicate;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.process.Morphology;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.util.*;
import static edu.stanford.nlp.trees.EnglishGrammaticalRelations.*;
import static edu.stanford.nlp.trees.GrammaticalRelation.*;
/**
* A GrammaticalStructure for English. This is the class that produces Stanford Dependencies.
*
* For feeding Stanford parser trees into this class, the Stanford parser should be run with the
* "-retainNPTmpSubcategories" option for best results!
*
* @author Bill MacCartney
* @author Marie-Catherine de Marneffe
* @author Christopher Manning
* @author Daniel Cer (CoNLLX format and alternative user selected dependency
* printer/reader interface)
* @author John Bauer
*/
public class EnglishGrammaticalStructure extends GrammaticalStructure {
private static final long serialVersionUID = -1866362375001969402L;
private static final boolean DEBUG = System.getProperty("EnglishGrammaticalStructure", null) != null;
/**
* Construct a new {@code EnglishGrammaticalStructure} from an existing parse
* tree. The new {@code GrammaticalStructure} has the same tree structure
* and label values as the given tree (but no shared storage). As part of
* construction, the parse tree is analyzed using definitions from
* {@link GrammaticalRelation GrammaticalRelation
} to populate
* the new GrammaticalStructure
with as many labeled grammatical
* relations as it can.
*
* @param t Parse tree to make grammatical structure from
*/
public EnglishGrammaticalStructure(Tree t) {
this(t, new PennTreebankLanguagePack().punctuationWordRejectFilter());
}
/**
* This gets used by GrammaticalStructureFactory (by reflection). DON'T DELETE.
*
* @param t Parse tree to make grammatical structure from
* @param puncFilter Filter to remove punctuation dependencies
*/
public EnglishGrammaticalStructure(Tree t, Predicate puncFilter) {
this(t, puncFilter, new SemanticHeadFinder(true), true);
}
/**
* This gets used by GrammaticalStructureFactory (by reflection). DON'T DELETE.
*
* @param t Parse tree to make grammatical structure from
* @param puncFilter Filter to remove punctuation dependencies
* @param hf HeadFinder to use when building it
*/
public EnglishGrammaticalStructure(Tree t, Predicate puncFilter, HeadFinder hf) {
this(t, puncFilter, hf, true);
}
/**
* Construct a new GrammaticalStructure
from an existing parse
* tree. The new GrammaticalStructure
has the same tree structure
* and label values as the given tree (but no shared storage). As part of
* construction, the parse tree is analyzed using definitions from
* {@link GrammaticalRelation GrammaticalRelation
} to populate
* the new GrammaticalStructure
with as many labeled grammatical
* relations as it can.
*
* @param t Parse tree to make grammatical structure from
* @param puncFilter Filter for punctuation words
* @param hf HeadFinder to use when building it
* @param threadSafe Whether or not to support simultaneous instances among multiple
* threads
*/
public EnglishGrammaticalStructure(Tree t, Predicate puncFilter, HeadFinder hf, boolean threadSafe) {
// the tree is normalized (for index and functional tag stripping) inside CoordinationTransformer
super(t, EnglishGrammaticalRelations.values(threadSafe), threadSafe ? EnglishGrammaticalRelations.valuesLock() : null, new CoordinationTransformer(hf), hf, puncFilter, Filters.acceptFilter());
}
/** Used for postprocessing CoNLL X dependencies */
public EnglishGrammaticalStructure(List projectiveDependencies, TreeGraphNode root) {
super(projectiveDependencies, root);
}
/**
* Returns a Filter which checks dependencies for usefulness as
* extra tree-based dependencies. By default, everything is
* accepted. One example of how this can be useful is in the
* English dependencies, where the REL dependency is used as an
* intermediate and we do not want this to be added when we make a
* second pass over the trees for missing dependencies.
*/
@Override
protected Predicate extraTreeDepFilter() {
return extraTreeDepFilter;
}
private static class ExtraTreeDepFilter implements Predicate, Serializable {
@Override
public boolean test(TypedDependency d) {
return d != null && d.reln() != RELATIVE;
}
private static final long serialVersionUID = 1L;
}
private static final Predicate extraTreeDepFilter = new ExtraTreeDepFilter();
@Override
protected void correctDependencies(List list) {
if (DEBUG) {
printListSorted("At correctDependencies:", list);
}
correctSubjPass(list);
if (DEBUG) {
printListSorted("After correctSubjPass:", list);
}
removeExactDuplicates(list);
if (DEBUG) {
printListSorted("After removeExactDuplicates:", list);
}
}
private static void printListSorted(String title, Collection list) {
List lis = new ArrayList(list);
Collections.sort(lis);
if (title != null) {
System.err.println(title);
}
System.err.println(lis);
}
@Override
protected void postProcessDependencies(List list) {
if (DEBUG) {
printListSorted("At postProcessDependencies:", list);
}
SemanticGraph sg = new SemanticGraph(list);
correctWHAttachment(sg);
list.clear();
list.addAll(sg.typedDependencies());
if (DEBUG) {
printListSorted("After correcting WH movement", list);
}
convertRel(list);
if (DEBUG) {
printListSorted("After converting rel:", list);
}
}
@Override
protected void getExtras(List list) {
addRef(list);
if (DEBUG) {
printListSorted("After adding ref:", list);
}
addExtraNSubj(list);
if (DEBUG) {
printListSorted("After adding extra nsubj:", list);
}
addStrandedPobj(list);
if (DEBUG) {
printListSorted("After adding stranded pobj:", list);
}
}
// Using this makes addStrandedPobj a lot cleaner looking, but it
// makes the converter roughly 2% slower. Might not be worth it.
// Similar changes could be made to many of the other complicated
// collapsing methods.
// static final SemgrexPattern strandedPobjSemgrex = SemgrexPattern.compile("{}=head >rcmod ({} [ == {}=prepgov | >xcomp {}=prepgov | >conj {}=prepgov ]) : {}=prepgov >prep ({}=prepdep !>pcomp {} !> pobj {})");
// // Deal with preposition stranding in relative clauses.
// // For example, "the only thing I'm rooting for"
// // This method will add pobj(for, thing) by connecting using the rcmod and prep
// private static void addStrandedPobj(List list) {
// SemanticGraph graph = new SemanticGraph(list);
// SemgrexMatcher matcher = strandedPobjSemgrex.matcher(graph);
// while (matcher.find()) {
// IndexedWord gov = matcher.getNode("prepdep");
// IndexedWord dep = matcher.getNode("head");
// TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, gov, dep);
// newDep.setExtra();
// list.add(newDep);
// }
// }
// Deal with preposition stranding in relative clauses.
// For example, "the only thing I'm rooting for"
// This method will add pobj(for, thing) by connecting using the rcmod and prep
private static void addStrandedPobj(List list) {
List depNodes = null;
List newDeps = null;
for (TypedDependency rcmod : list) {
if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) {
continue;
}
IndexedWord head = rcmod.gov();
if (depNodes == null) {
depNodes = Generics.newArrayList();
} else {
depNodes.clear();
}
depNodes.add(rcmod.dep());
for (TypedDependency connected : list) {
if (connected.gov().equals(rcmod.dep()) && (connected.reln() == XCLAUSAL_COMPLEMENT || connected.reln() == CONJUNCT)) {
depNodes.add(connected.dep());
}
}
for (IndexedWord dep : depNodes) {
for (TypedDependency prep : list) {
if (!prep.gov().equals(dep) || prep.reln() != PREPOSITIONAL_MODIFIER) {
continue;
}
boolean found = false;
for (TypedDependency other : list) {
if (other.gov().equals(prep.dep()) && (other.reln() == PREPOSITIONAL_COMPLEMENT || other.reln() == PREPOSITIONAL_OBJECT)) {
found = true;
break;
}
}
if (!found) {
if (newDeps == null) {
newDeps = Generics.newArrayList();
}
TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, prep.dep(), head);
newDeps.add(newDep);
}
}
}
}
if (newDeps != null) {
list.addAll(newDeps);
}
}
/* Used by correctWHAttachment */
private static SemgrexPattern XCOMP_PATTERN = SemgrexPattern.compile("{}=root >xcomp {}=embedded >/^(dep|dobj)$/ {}=wh ?>/([di]obj)/ {}=obj");
private static Morphology morphology = new Morphology();
/**
* Tries to correct complicated cases of WH-movement in
* sentences such as "What does Mary seem to have?" in
* which "What" should attach to "have" instead of the
* control verb.
*
* @param sg The Semantic graph to operate on.
*/
private static void correctWHAttachment(SemanticGraph sg) {
/* Semgrexes require a graph with a root. */
if (sg.getRoots().isEmpty())
return;
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = XCOMP_PATTERN.matcher(sgCopy);
while (matcher.findNextMatchingNode()) {
IndexedWord root = matcher.getNode("root");
IndexedWord embeddedVerb = matcher.getNode("embedded");
IndexedWord wh = matcher.getNode("wh");
IndexedWord dobj = matcher.getNode("obj");
/* Check if the object is a WH-word. */
if (wh.tag().startsWith("W")) {
boolean reattach = false;
/* If the control verb already has an object, then
we have to reattach th WH-word to the verb in the embedded clause. */
if (dobj != null) {
reattach = true;
} else {
/* If the control verb can't have an object, we also have to reattach. */
String lemma = morphology.lemma(root.value(), root.tag());
if (lemma.matches(EnglishPatterns.NP_V_S_INF_VERBS_REGEX)) {
reattach = true;
}
}
if (reattach) {
SemanticGraphEdge edge = sg.getEdge(root, wh);
if (edge != null) {
sg.removeEdge(edge);
sg.addEdge(embeddedVerb, wh, DIRECT_OBJECT, Double.NEGATIVE_INFINITY, false);
}
}
}
}
}
/**
* What we do in this method is look for temporary dependencies of
* the type "rel". These occur in sentences such as "I saw the man
* who you love". In that case, we should produce dobj(love, who).
* On the other hand, in the sentence "... which Mr. Bush was
* fighting for", we should have pobj(for, which).
*/
private static void convertRel(List list) {
List newDeps = new ArrayList();
for (TypedDependency rel : list) {
if (rel.reln() != RELATIVE) {
continue;
}
boolean foundPrep = false;
for (TypedDependency prep : list) {
// todo: It would also be good to add a rule here to prefer ccomp nsubj over dobj if there is a ccomp with no subj
// then we could get right: Which eco-friendly options do you think there will be on the new Lexus?
if (prep.reln() != PREPOSITIONAL_MODIFIER) {
continue;
}
if (!prep.gov().equals(rel.gov())) {
continue;
}
// at this point, we have two dependencies as in the Mr. Bush
// example. it should be rel(fighting, which) and
// prep(fighting, for). We now look to see if there is a
// corresponding pobj associated with the dependent of the
// prep relation. If not, we will connect the dep of the prep
// relation and the head of the rel relation. Otherwise, the
// original rel relation will become a dobj.
boolean foundPobj = false;
for (TypedDependency pobj : list) {
if (pobj.reln() != PREPOSITIONAL_OBJECT && pobj.reln() != PREPOSITIONAL_COMPLEMENT) {
continue;
}
if (!pobj.gov().equals(prep.dep())) {
continue;
}
// we did find a pobj/pcomp, so it is not necessary to
// change this rel.
foundPobj = true;
break;
}
if (!foundPobj) {
foundPrep = true;
TypedDependency newDep = new TypedDependency(PREPOSITIONAL_OBJECT, prep.dep(), rel.dep());
newDeps.add(newDep);
rel.setReln(KILL);
// break; // only put it in one place (or do we want to allow across-the-board effects?
}
}
if (!foundPrep) {
rel.setReln(DIRECT_OBJECT);
}
}
filterKill(list);
for (TypedDependency dep : newDeps) {
if (!list.contains(dep)) {
list.add(dep);
}
}
}
/**
* Alters a list in place by removing all the KILL relations
*/
private static void filterKill(Collection deps) {
List filtered = Generics.newArrayList();
for (TypedDependency dep : deps) {
if (dep.reln() != KILL) {
filtered.add(dep);
}
}
deps.clear();
deps.addAll(filtered);
}
/**
* Destructively modifies this {@code Collection}
* by collapsing several types of transitive pairs of dependencies.
* If called with a tree of dependencies and both CCprocess and
* includeExtras set to false, then the tree structure is preserved.
*
* - prepositional object dependencies: pobj
* -
*
prep(cat, in)
and pobj(in, hat)
are collapsed to
* prep_in(cat, hat)
* - prepositional complement dependencies: pcomp
* -
*
prep(heard, of)
and pcomp(of, attacking)
are
* collapsed to prepc_of(heard, attacking)
* - conjunct dependencies
* -
*
cc(investors, and)
and
* conj(investors, regulators)
are collapsed to
* conj_and(investors,regulators)
* - possessive dependencies: possessive
* -
*
possessive(Montezuma, 's)
will be erased. This is like a collapsing, but
* due to the flatness of NPs, two dependencies are not actually composed.
* - For relative clauses, it will collapse referent
* -
*
ref(man, that)
and dobj(love, that)
are collapsed
* to dobj(love, man)
*
*/
@Override
protected void collapseDependencies(List list, boolean CCprocess, Extras includeExtras) {
if (DEBUG) {
printListSorted("collapseDependencies: CCproc: " + CCprocess + " includeExtras: " + includeExtras, list);
}
correctDependencies(list);
if (DEBUG) {
printListSorted("After correctDependencies:", list);
}
eraseMultiConj(list);
if (DEBUG) {
printListSorted("After collapse multi conj:", list);
}
collapse2WP(list);
if (DEBUG) {
printListSorted("After collapse2WP:", list);
}
collapseFlatMWP(list);
if (DEBUG) {
printListSorted("After collapseFlatMWP:", list);
}
collapse2WPbis(list);
if (DEBUG) {
printListSorted("After collapse2WPbis:", list);
}
collapse3WP(list);
if (DEBUG) {
printListSorted("After collapse3WP:", list);
}
collapsePrepAndPoss(list);
if (DEBUG) {
printListSorted("After PrepAndPoss:", list);
}
collapseConj(list);
if (DEBUG) {
printListSorted("After conj:", list);
}
if (includeExtras.doRef) {
addRef(list);
if (DEBUG) {
printListSorted("After adding ref:", list);
}
if (includeExtras.collapseRef) {
collapseReferent(list);
if (DEBUG) {
printListSorted("After collapse referent:", list);
}
}
}
if (CCprocess) {
treatCC(list);
if (DEBUG) {
printListSorted("After treatCC:", list);
}
}
if (includeExtras.doSubj) {
addExtraNSubj(list);
if (DEBUG) {
printListSorted("After adding extra nsubj:", list);
}
correctSubjPass(list);
if (DEBUG) {
printListSorted("After correctSubjPass:", list);
}
}
removeDep(list);
if (DEBUG) {
printListSorted("After remove dep:", list);
}
Collections.sort(list);
if (DEBUG) {
printListSorted("After all collapse:", list);
}
}
@Override
protected void collapseDependenciesTree(List list) {
collapseDependencies(list, false, Extras.NONE);
}
/**
* Does some hard coding to deal with relation in CONJP. For now we deal with:
* but not, if not, instead of, rather than, but rather GO TO negcc
* as well as, not to mention, but also, & GO TO and.
*
* @param conj The head dependency of the conjunction marker
* @return A GrammaticalRelation made from a normalized form of that
* conjunction.
*/
private static GrammaticalRelation conjValue(String conj) {
String newConj = conj.toLowerCase();
if (newConj.equals("not") || newConj.equals("instead") || newConj.equals("rather")) {
newConj = "negcc";
} else if (newConj.equals("mention") || newConj.equals("to") || newConj.equals("also") || newConj.contains("well") || newConj.equals("&")) {
newConj = "and";
}
return EnglishGrammaticalRelations.getConj(newConj);
}
private static void treatCC(Collection list) {
// Construct a map from tree nodes to the set of typed
// dependencies in which the node appears as dependent.
Map> map = Generics.newHashMap();
// Construct a map of tree nodes being governor of a subject grammatical
// relation to that relation
Map subjectMap = Generics.newHashMap();
// Construct a set of TreeGraphNodes with a passive auxiliary on them
Set withPassiveAuxiliary = Generics.newHashSet();
// Construct a map of tree nodes being governor of an object grammatical
// relation to that relation
// Map objectMap = new
// HashMap();
List rcmodHeads = Generics.newArrayList();
List prepcDep = Generics.newArrayList();
for (TypedDependency typedDep : list) {
if (!map.containsKey(typedDep.dep())) {
// NB: Here and in other places below, we use a TreeSet (which extends
// SortedSet) to guarantee that results are deterministic)
map.put(typedDep.dep(), new TreeSet());
}
map.get(typedDep.dep()).add(typedDep);
if (typedDep.reln().equals(AUX_PASSIVE_MODIFIER)) {
withPassiveAuxiliary.add(typedDep.gov());
}
// look for subjects
if (typedDep.reln().getParent() == NOMINAL_SUBJECT || typedDep.reln().getParent() == SUBJECT || typedDep.reln().getParent() == CLAUSAL_SUBJECT) {
if (!subjectMap.containsKey(typedDep.gov())) {
subjectMap.put(typedDep.gov(), typedDep);
}
}
// look for objects
// this map was only required by the code commented out below, so comment
// it out too
// if (typedDep.reln() == DIRECT_OBJECT) {
// if (!objectMap.containsKey(typedDep.gov())) {
// objectMap.put(typedDep.gov(), typedDep);
// }
// }
// look for rcmod relations
if (typedDep.reln() == RELATIVE_CLAUSE_MODIFIER) {
rcmodHeads.add(typedDep.gov());
}
// look for prepc relations: put the dependent of such a relation in the
// list
// to avoid wrong propagation of dobj
if (typedDep.reln().toString().startsWith("prepc")) {
prepcDep.add(typedDep.dep());
}
}
// System.err.println(map);
// if (DEBUG) System.err.println("Subject map: " + subjectMap);
// if (DEBUG) System.err.println("Object map: " + objectMap);
// System.err.println(rcmodHeads);
// create a new list of typed dependencies
Collection newTypedDeps = new ArrayList(list);
// find typed deps of form conj(gov,dep)
for (TypedDependency td : list) {
if (EnglishGrammaticalRelations.getConjs().contains(td.reln())) {
IndexedWord gov = td.gov();
IndexedWord dep = td.dep();
// look at the dep in the conjunct
Set gov_relations = map.get(gov);
// System.err.println("gov " + gov);
if (gov_relations != null) {
for (TypedDependency td1 : gov_relations) {
// System.err.println("gov rel " + td1);
IndexedWord newGov = td1.gov();
// in the case of errors in the basic dependencies, it
// is possible to have overlapping newGov & dep
if (newGov.equals(dep)) {
continue;
}
GrammaticalRelation newRel = td1.reln();
if (newRel != ROOT) {
if (rcmodHeads.contains(gov) && rcmodHeads.contains(dep)) {
// to prevent wrong propagation in the case of long dependencies in relative clauses
if (newRel != DIRECT_OBJECT && newRel != NOMINAL_SUBJECT) {
if (DEBUG) {
System.err.println("Adding new " + newRel + " dependency from " + newGov + " to " + dep + " (subj/obj case)");
}
newTypedDeps.add(new TypedDependency(newRel, newGov, dep));
}
} else {
if (DEBUG) {
System.err.println("Adding new " + newRel + " dependency from " + newGov + " to " + dep);
}
newTypedDeps.add(new TypedDependency(newRel, newGov, dep));
}
}
}
}
// propagate subjects
// look at the gov in the conjunct: if it is has a subject relation,
// the dep is a verb and the dep doesn't have a subject relation
// then we want to add a subject relation for the dep.
// (By testing for the dep to be a verb, we are going to miss subject of
// copula verbs! but
// is it safe to relax this assumption?? i.e., just test for the subject
// part)
// CDM 2008: I also added in JJ, since participial verbs are often
// tagged JJ
String tag = dep.tag();
if (subjectMap.containsKey(gov) && (tag.startsWith("VB") || tag.startsWith("JJ")) && ! subjectMap.containsKey(dep)) {
TypedDependency tdsubj = subjectMap.get(gov);
// check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then
// add nsubj (if it is tagged correctly, should do this for VBD too, but we don't)
GrammaticalRelation relation = tdsubj.reln();
if (relation == NOMINAL_PASSIVE_SUBJECT) {
if (isDefinitelyActive(tag)) {
relation = NOMINAL_SUBJECT;
}
} else if (relation == CLAUSAL_PASSIVE_SUBJECT) {
if (isDefinitelyActive(tag)) {
relation = CLAUSAL_SUBJECT;
}
} else if (relation == NOMINAL_SUBJECT) {
if (withPassiveAuxiliary.contains(dep)) {
relation = NOMINAL_PASSIVE_SUBJECT;
}
} else if (relation == CLAUSAL_SUBJECT) {
if (withPassiveAuxiliary.contains(dep)) {
relation = CLAUSAL_PASSIVE_SUBJECT;
}
}
if (DEBUG) {
System.err.println("Adding new " + relation + " dependency from " + dep + " to " + tdsubj.dep() + " (subj propagation case)");
}
newTypedDeps.add(new TypedDependency(relation, dep, tdsubj.dep()));
}
// propagate objects
// cdm july 2010: This bit of code would copy a dobj from the first
// clause to a later conjoined clause if it didn't
// contain its own dobj or prepc. But this is too aggressive and wrong
// if the later clause is intransitive
// (including passivized cases) and so I think we have to not have this
// done always, and see no good "sometimes" heuristic.
// IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp
// (SBAR).
// if (objectMap.containsKey(gov) &&
// dep.tag().startsWith("VB") && ! objectMap.containsKey(dep)
// && ! prepcDep.contains(gov)) {
// TypedDependency tdobj = objectMap.get(gov);
// if (DEBUG) {
// System.err.println("Adding new " + tdobj.reln() + " dependency from "
// + dep + " to " + tdobj.dep() + " (obj propagation case)");
// }
// newTypedDeps.add(new TypedDependency(tdobj.reln(), dep,
// tdobj.dep()));
// }
}
}
list.clear();
list.addAll(newTypedDeps);
}
private static boolean isDefinitelyActive(String tag) {
// we should include VBD, but don't as it is often a tagging mistake.
return tag.equals("VB") || tag.equals("VBZ") || tag.equals("VBP") || tag.startsWith("JJ");
}
/**
* This rewrites the "conj" relation to "conj_word" and deletes cases of the
* "cc" relation providing this rewrite has occurred (but not if there is only
* something like a clause-initial and). For instance, cc(elected-5, and-9)
* conj(elected-5, re-elected-11) becomes conj_and(elected-5, re-elected-11)
*
* @param list List of dependencies.
*/
private static void collapseConj(Collection list) {
List govs = Generics.newArrayList();
// find typed deps of form cc(gov, dep)
for (TypedDependency td : list) {
if (td.reln() == COORDINATION) { // i.e. "cc"
IndexedWord gov = td.gov();
GrammaticalRelation conj = conjValue(td.dep().value());
if (DEBUG) {
System.err.println("Set conj to " + conj + " based on " + td);
}
// find other deps of that gov having reln "conj"
boolean foundOne = false;
for (TypedDependency td1 : list) {
if (td1.gov().equals(gov)) {
if (td1.reln() == CONJUNCT) { // i.e., "conj"
// change "conj" to the actual (lexical) conjunction
if (DEBUG) {
System.err.println("Changing " + td1 + " to have relation " + conj);
}
td1.setReln(conj);
foundOne = true;
} else if (td1.reln() == COORDINATION) {
conj = conjValue(td1.dep().value());
if (DEBUG) {
System.err.println("Set conj to " + conj + " based on " + td1);
}
}
}
}
// register to remove cc from this governor
if (foundOne) {
govs.add(gov);
}
}
}
// now remove typed dependencies with reln "cc" if we have successfully
// collapsed
for (Iterator iter = list.iterator(); iter.hasNext();) {
TypedDependency td2 = iter.next();
if (td2.reln() == COORDINATION && govs.contains(td2.gov())) {
iter.remove();
}
}
}
/**
* This method will collapse a referent relation such as follows. e.g.:
* "The man that I love … " ref(man, that) dobj(love, that) ->
* dobj(love, man)
*/
private static void collapseReferent(Collection list) {
// find typed deps of form ref(gov, dep)
// put them in a List for processing; remove them from the set of deps
List refs = new ArrayList();
for (Iterator iter = list.iterator(); iter.hasNext();) {
TypedDependency td = iter.next();
if (td.reln() == REFERENT) {
refs.add(td);
iter.remove();
}
}
// now substitute target of referent where possible
for (TypedDependency ref : refs) {
IndexedWord dep = ref.dep();// take the relative word
IndexedWord ant = ref.gov();// take the antecedent
for (TypedDependency td : list) {
// the last condition below maybe shouldn't be necessary, but it has
// helped stop things going haywire a couple of times (it stops the
// creation of a unit cycle that probably leaves something else
// disconnected) [cdm Jan 2010]
if (td.dep().equals(dep) && td.reln() != REFERENT && !td.gov().equals(ant)) {
if (DEBUG) {
System.err.print("referent: changing " + td);
}
td.setDep(ant);
td.setExtra();
if (DEBUG) {
System.err.println(" to " + td);
}
}
}
}
}
/**
* Look for ref rules for a given word. We look through the
* children and grandchildren of the rcmod dependency, and if any
* children or grandchildren depend on a that/what/which/etc word,
* we take the leftmost that/what/which/etc word as the dependent
* for the ref TypedDependency.
*/
private static void addRef(Collection list) {
List newDeps = new ArrayList();
for (TypedDependency rcmod : list) {
if (rcmod.reln() != RELATIVE_CLAUSE_MODIFIER) {
// we only add ref dependencies across relative clauses
continue;
}
IndexedWord head = rcmod.gov();
IndexedWord modifier = rcmod.dep();
TypedDependency leftChild = null;
for (TypedDependency child : list) {
if (child.gov().equals(modifier) &&
EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(child.dep().value()).matches() &&
(leftChild == null || child.dep().index() < leftChild.dep().index())) {
leftChild = child;
}
}
// TODO: could be made more efficient
TypedDependency leftGrandchild = null;
for (TypedDependency child : list) {
if (!child.gov().equals(modifier)) {
continue;
}
for (TypedDependency grandchild : list) {
if (grandchild.gov().equals(child.dep()) &&
EnglishPatterns.RELATIVIZING_WORD_PATTERN.matcher(grandchild.dep().value()).matches() &&
(leftGrandchild == null || grandchild.dep().index() < leftGrandchild.dep().index())) {
leftGrandchild = grandchild;
}
}
}
TypedDependency newDep = null;
if (leftGrandchild != null && (leftChild == null || leftGrandchild.dep().index() < leftChild.dep().index())) {
newDep = new TypedDependency(REFERENT, head, leftGrandchild.dep());
} else if (leftChild != null) {
newDep = new TypedDependency(REFERENT, head, leftChild.dep());
}
if (newDep != null) {
newDeps.add(newDep);
}
}
for (TypedDependency newDep : newDeps) {
if (!list.contains(newDep)) {
newDep.setExtra();
list.add(newDep);
}
}
}
/**
* Add extra nsubj dependencies when collapsing basic dependencies.
*
* In the general case, we look for an aux modifier under an xcomp
* modifier, and assuming there aren't already associated nsubj
* dependencies as daughters of the original xcomp dependency, we
* add nsubj dependencies for each nsubj daughter of the aux.
*
* There is also a special case for "to" words, in which case we add
* a dependency if and only if there is no nsubj associated with the
* xcomp and there is no other aux dependency. This accounts for
* sentences such as "he decided not to" with no following verb.
*/
private static void addExtraNSubj(Collection list) {
List newDeps = new ArrayList();
for (TypedDependency xcomp : list) {
if (xcomp.reln() != XCLAUSAL_COMPLEMENT) {
// we only add extra nsubj dependencies to some xcomp dependencies
continue;
}
IndexedWord modifier = xcomp.dep();
IndexedWord head = xcomp.gov();
boolean hasSubjectDaughter = false;
boolean hasAux = false;
List subjects = Generics.newArrayList();
List objects = Generics.newArrayList();
for (TypedDependency dep : list) {
// already have a subject dependency
if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(modifier)) {
hasSubjectDaughter = true;
break;
}
if (dep.reln() == AUX_MODIFIER && dep.gov().equals(modifier)) {
hasAux = true;
}
if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov().equals(head)) {
subjects.add(dep.dep());
}
if (dep.reln() == DIRECT_OBJECT && dep.gov().equals(head)) {
objects.add(dep.dep());
}
}
// if we already have an nsubj dependency, no need to add an extra nsubj
if (hasSubjectDaughter) {
continue;
}
if ((modifier.value().equalsIgnoreCase("to") && hasAux) ||
(!modifier.value().equalsIgnoreCase("to") && !hasAux)) {
continue;
}
// In general, we find that the objects of the verb are better
// for extra nsubj than the original nsubj of the verb. For example,
// "Many investors wrote asking the SEC to require ..."
// There is no nsubj of asking, but the dobj, SEC, is the extra nsubj of require.
// Similarly, "The law tells them when to do so"
// Instead of nsubj(do, law) we want nsubj(do, them)
if (objects.size() > 0) {
for (IndexedWord object : objects) {
TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, object);
newDeps.add(newDep);
}
} else {
for (IndexedWord subject : subjects) {
TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, subject);
newDeps.add(newDep);
}
}
}
for (TypedDependency newDep : newDeps) {
if (!list.contains(newDep)) {
newDep.setExtra();
list.add(newDep);
}
}
}
/**
* This method corrects subjects of verbs for which we identified an auxpass,
* but didn't identify the subject as passive.
*
* @param list List of typedDependencies to work on
*/
private static void correctSubjPass(Collection list) {
// put in a list verbs having an auxpass
List list_auxpass = new ArrayList();
for (TypedDependency td : list) {
if (td.reln() == AUX_PASSIVE_MODIFIER) {
list_auxpass.add(td.gov());
}
}
for (TypedDependency td : list) {
// correct nsubj
if (td.reln() == NOMINAL_SUBJECT && list_auxpass.contains(td.gov())) {
// System.err.println("%%% Changing subj to passive: " + td);
td.setReln(NOMINAL_PASSIVE_SUBJECT);
}
if (td.reln() == CLAUSAL_SUBJECT && list_auxpass.contains(td.gov())) {
// System.err.println("%%% Changing subj to passive: " + td);
td.setReln(CLAUSAL_PASSIVE_SUBJECT);
}
// correct unretrieved poss: dep relation in which the dependent is a
// PRP$ or WP$
// cdm: Now done in basic rules. The only cases that this still matches
// are (1) tagging mistakes where PRP in dobj position is mistagged PRP$
// or a couple of parsing errors where the dependency is wrong anyway, so
// it's probably okay to keep it a dep. So I'm disabling this.
// String tag = td.dep().tag();
// if (td.reln() == DEPENDENT && (tag.equals("PRP$") || tag.equals("WP$"))) {
// System.err.println("%%% Unrecognized basic possessive pronoun: " + td);
// td.setReln(POSSESSION_MODIFIER);
// }
}
}
private static boolean inConjDeps(TypedDependency td, List> conjs) {
for (Triple trip : conjs) {
if (td.equals(trip.first())) {
return true;
}
}
return false;
}
private static void collapsePrepAndPoss(Collection list) {
// Man oh man, how gnarly is the logic of this method....
Collection newTypedDeps = new ArrayList();
// Construct a map from tree nodes to the set of typed
// dependencies in which the node appears as governor.
// cdm: could use CollectionValuedMap here!
Map> map = Generics.newHashMap();
List vmod = Generics.newArrayList();
for (TypedDependency typedDep : list) {
if (!map.containsKey(typedDep.gov())) {
map.put(typedDep.gov(), new TreeSet());
}
map.get(typedDep.gov()).add(typedDep);
if (typedDep.reln() == VERBAL_MODIFIER) {
// look for aux deps which indicate this was a to-be verb
boolean foundAux = false;
for (TypedDependency auxDep : list) {
if (auxDep.reln() != AUX_MODIFIER) {
continue;
}
if (!auxDep.gov().equals(typedDep.dep()) || !auxDep.dep().value().equalsIgnoreCase("to")) {
continue;
}
foundAux = true;
break;
}
if (!foundAux) {
vmod.add(typedDep.dep());
}
}
}
// System.err.println("here's the vmod list: " + vmod);
// Do preposition conjunction interaction for
// governor p NP and p NP case ... a lot of special code cdm jan 2006
for (TypedDependency td1 : list) {
if (td1.reln() != PREPOSITIONAL_MODIFIER) {
continue;
}
IndexedWord td1Dep = td1.dep();
SortedSet possibles = map.get(td1Dep);
if (possibles == null) {
continue;
}
// look for the "second half"
// unique: the head prep and whether it should be pobj
Pair prepDep = null;
TypedDependency ccDep = null; // treat as unique
// list of dep and prepOtherDep and pobj (or pcomp)
List> conjs = new ArrayList>();
Set otherDtrs = new TreeSet();
// first look for a conj(prep, prep) (there might be several conj relations!!!)
boolean samePrepositionInEachConjunct = true;
int conjIndex = -1;
for (TypedDependency td2 : possibles) {
if (td2.reln() == CONJUNCT) {
IndexedWord td2Dep = td2.dep();
String td2DepPOS = td2Dep.tag();
if (td2DepPOS.equals("IN") || td2DepPOS.equals("TO")) {
samePrepositionInEachConjunct = samePrepositionInEachConjunct && td2Dep.value().equals(td1Dep.value());
Set possibles2 = map.get(td2Dep);
boolean pobj = true;// default of collapsing preposition is prep_
TypedDependency prepOtherDep = null;
if (possibles2 != null) {
for (TypedDependency td3 : possibles2) {
IndexedWord td3Dep = td3.dep();
String td3DepPOS = td3Dep.tag();
// CDM Mar 2006: I put in disjunction here when I added in
// PREPOSITIONAL_OBJECT. If it catches all cases, we should
// be able to delete the DEPENDENT disjunct
// maybe better to delete the DEPENDENT disjunct - it creates
// problem with multiple prep (mcdm)
if ((td3.reln() == PREPOSITIONAL_OBJECT || td3.reln() == PREPOSITIONAL_COMPLEMENT) && (!(td3DepPOS.equals("IN") || td3DepPOS.equals("TO"))) && prepOtherDep == null) {
prepOtherDep = td3;
if (td3.reln() == PREPOSITIONAL_COMPLEMENT) {
pobj = false;
}
} else {
otherDtrs.add(td3);
}
}
}
if (conjIndex < td2Dep.index()) {
conjIndex = td2Dep.index();
}
conjs.add(new Triple(td2, prepOtherDep, pobj));
}
}
} // end td2:possibles
if (conjs.isEmpty()) {
continue;
}
// if we have a conj under a preposition dependency, we look for the other
// parts
String td1DepPOS = td1Dep.tag();
for (TypedDependency td2 : possibles) {
// we look for the cc linked to this conjDep
// the cc dep must have an index smaller than the dep of conjDep
if (td2.reln() == COORDINATION && td2.dep().index() < conjIndex) {
ccDep = td2;
} else {
IndexedWord td2Dep = td2.dep();
String td2DepPOS = td2Dep.tag();
// System.err.println("prepDep find: td1.reln: " + td1.reln() +
// "; td2.reln: " + td2.reln() + "; td1DepPos: " + td1DepPOS +
// "; td2DepPos: " + td2DepPOS + "; index " + index +
// "; td2.dep().index(): " + td2.dep().index());
if ((td2.reln() == DEPENDENT || td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && prepDep == null && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO")))) {
// same index trick, in case we have multiple deps
// I deleted this to see if it helped [cdm Jan 2010] &&
// td2.dep().index() < index)
prepDep = new Pair(td2, td2.reln() != PREPOSITIONAL_COMPLEMENT);
} else if (!inConjDeps(td2, conjs)) {// don't want to add the conjDep
// again!
otherDtrs.add(td2);
}
}
}
if (prepDep == null || ccDep == null) {
continue; // we can't deal with it in the hairy prep/conj interaction case!
}
if (DEBUG) {
// ccDep must be non-null given test above
System.err.println("!! Conj and prep case:");
System.err.println(" td1 (prep): " + td1);
System.err.println(" Kids of td1 are: " + possibles);
System.err.println(" prepDep: " + prepDep);
System.err.println(" ccDep: " + ccDep);
System.err.println(" conjs: " + conjs);
System.err.println(" samePrepositionInEachConjunct: " + samePrepositionInEachConjunct);
System.err.println(" otherDtrs: " + otherDtrs);
}
// check if we have the same prepositions in the conjunction
if (samePrepositionInEachConjunct) { // conjDep != null && prepOtherDep !=
// null &&
// OK, we have a conjunction over parallel PPs: Fred flew to Greece and
// to Serbia.
GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());
TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
newTypedDeps.add(tdNew);
if (DEBUG) {
System.err.println("PrepPoss Conj branch (two parallel PPs) adding: " + tdNew);
System.err.println(" removing: " + td1 + " " + prepDep + " " + ccDep);
}
td1.setReln(KILL);// remember these are "used up"
prepDep.first().setReln(KILL);
ccDep.setReln(KILL);
for (Triple trip : conjs) {
TypedDependency conjDep = trip.first();
TypedDependency prepOtherDep = trip.second();
if (prepOtherDep == null) {
// CDM July 2010: I think this should only ever happen if there is a
// misparse, but it has happened in such circumstances. You have
// something like (PP in or in (NP Serbia)), with the two
// prepositions the same. We just clean up the mess.
if (DEBUG) {
System.err.println(" apparent misparse: same P twice with only one NP object (prepOtherDep is null)");
System.err.println(" removing: " + conjDep);
}
ccDep.setReln(KILL);
} else {
TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), prepDep.first().dep(), prepOtherDep.dep());
newTypedDeps.add(tdNew2);
if (DEBUG) {
System.err.println(" adding: " + tdNew2);
System.err.println(" removing: " + conjDep + " " + prepOtherDep);
}
prepOtherDep.setReln(KILL);
}
conjDep.setReln(KILL);
}
// promote dtrs that would be orphaned
for (TypedDependency otd : otherDtrs) {
if (DEBUG) {
System.err.print("Changed " + otd);
}
otd.setGov(td1.gov());
if (DEBUG) {
System.err.println(" to " + otd);
}
}
// Now we need to see if there are any TDs that will be "orphaned"
// by this collapse. Example: if we have:
// dep(drew, on)
// dep(on, book)
// dep(on, right)
// the first two will be collapsed to on(drew, book), but then
// the third one will be orphaned, since its governor no
// longer appears. So, change its governor to 'drew'.
// CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now
// it does, since they're not automatically deleted
// Some things in possibles may have already been changed, so check gov
if (DEBUG) {
System.err.println("td1: " + td1 + "; possibles: " + possibles);
}
for (TypedDependency td2 : possibles) {
// if (DEBUG) {
// System.err.println("[a] td2.reln " + td2.reln() + " td2.gov " +
// td2.gov() + " td1.dep " + td1.dep());
// }
if (td2.reln() != KILL && td2.gov().equals(td1.dep())) { // && td2.reln()
// != COORDINATION
// && td2.reln()
// != CONJUNCT
if (DEBUG) {
System.err.println("Changing " + td2 + " to have governor of " + td1 + " [a]");
}
td2.setGov(td1.gov());
}
}
continue; // This one has been dealt with successfully
} // end same prepositions
// case of "Lufthansa flies to and from Serbia". Make it look like next
// case :-)
// that is, the prepOtherDep should be the same as prepDep !
for (Triple trip : conjs) {
if (trip.first() != null && trip.second() == null) {
trip.setSecond(new TypedDependency(prepDep.first().reln(), trip.first().dep(), prepDep.first().dep()));
trip.setThird(prepDep.second());
}
}
// we have two different prepositions in the conjunction
// in this case we need to add a node
// "Bill jumped over the fence and through the hoop"
// prep_over(jumped, fence)
// conj_and(jumped, jumped)
// prep_through(jumped, hoop)
// Extra complication:
// If "jumped" is already part of a conjunction, we should add the new one off that rather than chaining
IndexedWord conjHead = td1.gov();
for (TypedDependency td3 : list) {
if (td3.dep().equals(td1.gov()) && td3.reln().equals(CONJUNCT)) {
conjHead = td3.gov();
}
}
GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());
TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
newTypedDeps.add(tdNew);
if (DEBUG) {
System.err.println("ConjPP (different preps) adding: " + tdNew);
System.err.println(" deleting: " + td1 + " " + prepDep.first() + " " + ccDep);
}
td1.setReln(KILL);// remember these are "used up"
prepDep.first().setReln(KILL);
ccDep.setReln(KILL);
// so far we added the first prep grammatical relation
int copyNumber = 1;
for (Triple trip : conjs) {
TypedDependency conjDep = trip.first();
TypedDependency prepOtherDep = trip.second();
boolean pobj = trip.third();
// OK, we have a conjunction over different PPs
// we create a new node;
// in order to make a distinction between the original node and its copy
// we set the "copyCount" variable in the IndexedWord
// existence of copyCount > 0 is checked at printing (toString method of
// TypedDependency)
IndexedWord label = td1.gov().makeSoftCopy(copyNumber);
copyNumber++;
// now we add the conjunction relation between conjHead (either td1.gov
// or what it is itself conjoined with) and the copy
// the copy has the same label as td1.gov() but is another TreeGraphNode
// todo: Or that's the plan; there are a couple of knock on changes to fix before we can do this!
// TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), conjHead, label);
TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), td1.gov(), label);
newTypedDeps.add(tdNew2);
// now we still need to add the second prep grammatical relation
// between the copy and the dependent of the prepOtherDep node
TypedDependency tdNew3;
GrammaticalRelation reln2 = determinePrepRelation(map, vmod, conjDep, td1, pobj);
tdNew3 = new TypedDependency(reln2, label, prepOtherDep.dep());
newTypedDeps.add(tdNew3);
if (DEBUG) {
System.err.println(" adding: " + tdNew2 + " " + tdNew3);
System.err.println(" deleting: " + conjDep + " " + prepOtherDep);
}
conjDep.setReln(KILL);
prepOtherDep.setReln(KILL);
// promote dtrs that would be orphaned
for (TypedDependency otd : otherDtrs) {
// special treatment for prepositions: the original relation is
// likely to be a "dep" and we want this to be a "prep"
if (otd.dep().tag().equals("IN")) {
otd.setReln(PREPOSITIONAL_MODIFIER);
}
otd.setGov(td1.gov());
}
}
// Now we need to see if there are any TDs that will be "orphaned" off
// the first preposition
// by this collapse. Example: if we have:
// dep(drew, on)
// dep(on, book)
// dep(on, right)
// the first two will be collapsed to on(drew, book), but then
// the third one will be orphaned, since its governor no
// longer appears. So, change its governor to 'drew'.
// CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now
// it does, since they're not automatically deleted
for (TypedDependency td2 : possibles) {
if (td2.reln() != KILL) { // && td2.reln() != COORDINATION &&
// td2.reln() != CONJUNCT) {
if (DEBUG) {
System.err.println("Changing " + td2 + " to have governor of " + td1 + " [b]");
}
td2.setGov(td1.gov());
}
}
// end for different prepositions
} // for TypedDependency td1 : list
// below here is the single preposition/possessor basic case!!
for (TypedDependency td1 : list) {
if (td1.reln() == KILL) {
continue;
}
IndexedWord td1Dep = td1.dep();
String td1DepPOS = td1Dep.tag();
// find all other typedDeps having our dep as gov
Set possibles = map.get(td1Dep);
if (possibles != null && (td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {
// look for the "second half"
boolean pobj = true;// default for prep relation is prep_
for (TypedDependency td2 : possibles) {
if (td2.reln() != COORDINATION && td2.reln() != CONJUNCT) {
IndexedWord td2Dep = td2.dep();
String td2DepPOS = td2Dep.tag();
if ((td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {
if (td2.reln() == POSSESSIVE_MODIFIER) {
if ( ! map.containsKey(td2Dep)) { // if 's has no kids of its own (it shouldn't!)
td2.setReln(KILL);
}
}
} else if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO"))) && !isConjWithNoPrep(td2.gov(), possibles)) {
// we don't collapse preposition conjoined with a non-preposition
// to avoid disconnected constituents
// OK, we have a pair td1, td2 to collapse to td3
if (DEBUG) {
System.err.println("(Single prep/poss base case collapsing " + td1 + " and " + td2);
}
// check whether we are in a pcomp case:
if (td2.reln() == PREPOSITIONAL_COMPLEMENT) {
pobj = false;
}
GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, pobj);
TypedDependency td3 = new TypedDependency(reln, td1.gov(), td2.dep());
if (DEBUG) {
System.err.println("PP adding: " + td3 + " deleting: " + td1 + ' ' + td2);
}
// add it to map to deal with recursive cases like "achieved this (PP (PP in part) with talent)"
map.get(td3.gov()).add(td3);
newTypedDeps.add(td3);
td1.setReln(KILL);// remember these are "used up"
td2.setReln(KILL);// remember these are "used up"
}
}
} // for TypedDependency td2
}
// Now we need to see if there are any TDs that will be "orphaned"
// by this collapse. Example: if we have:
// dep(drew, on)
// dep(on, book)
// dep(on, right)
// the first two will be collapsed to on(drew, book), but then
// the third one will be orphaned, since its governor no
// longer appears. So, change its governor to 'drew'.
// CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now
// it does, since they're not automatically deleted
if (possibles != null && td1.reln() == KILL) {
for (TypedDependency td2 : possibles) {
if (td2.reln() != KILL) { // && td2.reln() != COORDINATION &&
// td2.reln() != CONJUNCT) {
if (DEBUG) {
System.err.println("Changing " + td2 + " to have governor of " + td1 + " [c]");
}
td2.setGov(td1.gov());
}
}
}
} // for TypedDependency td1
// now remove typed dependencies with reln "kill" and add new ones.
for (Iterator iter = list.iterator(); iter.hasNext();) {
TypedDependency td = iter.next();
if (td.reln() == KILL) {
if (DEBUG) {
System.err.println("Removing dep killed in poss/prep (conj) collapse: " + td);
}
iter.remove();
}
}
list.addAll(newTypedDeps);
} // end collapsePrepAndPoss()
/** Work out prep relation name. pc is the dependency whose dep() is the
* preposition to do a name for. topPrep may be the same or different.
* Among the daughters of its gov is where to look for an auxpass.
*/
private static GrammaticalRelation determinePrepRelation(Map> map, List vmod, TypedDependency pc, TypedDependency topPrep, boolean pobj) {
// handling the case of an "agent":
// the governor of a "by" preposition must have an "auxpass" dependency
// or be the dependent of a "vmod" relation
// if it is the case, the "agent" variable becomes true
boolean agent = false;
String preposition = pc.dep().value().toLowerCase();
if (preposition.equals("by")) {
// look if we have an auxpass
Set aux_pass_poss = map.get(topPrep.gov());
if (aux_pass_poss != null) {
for (TypedDependency td_pass : aux_pass_poss) {
if (td_pass.reln() == AUX_PASSIVE_MODIFIER) {
agent = true;
}
}
}
// look if we have a vmod
if (!vmod.isEmpty() && vmod.contains(topPrep.gov())) {
agent = true;
}
}
GrammaticalRelation reln;
if (agent) {
reln = AGENT;
} else {
// for prepositions, use the preposition
// for pobj: we collapse into "prep"; for pcomp: we collapse into "prepc"
if (pobj) {
reln = EnglishGrammaticalRelations.getPrep(preposition);
} else {
reln = EnglishGrammaticalRelations.getPrepC(preposition);
}
}
return reln;
}
// used by collapse2WP(), collapseFlatMWP(), collapse2WPbis() KEPT IN
// ALPHABETICAL ORDER
private static final String[][] MULTIWORD_PREPS = { { "according", "to" }, { "across", "from" }, { "ahead", "of" }, { "along", "with" }, { "alongside", "of" }, { "apart", "from" }, { "as", "for" }, { "as", "from" }, { "as", "of" }, { "as", "per" }, { "as", "to" }, { "aside", "from" }, { "away", "from" }, { "based", "on" }, { "because", "of" }, { "close", "by" }, { "close", "to" }, { "contrary", "to" }, { "compared", "to" }, { "compared", "with" }, { "due", "to" }, { "depending", "on" }, { "except", "for" }, { "exclusive", "of" }, { "far", "from" }, { "followed", "by" }, { "inside", "of" }, { "instead", "of" }, { "irrespective", "of" }, { "next", "to" }, { "near", "to" }, { "off", "of" }, { "out", "of" }, { "outside", "of" }, { "owing", "to" }, { "preliminary", "to" },
{ "preparatory", "to" }, { "previous", "to" }, { "prior", "to" }, { "pursuant", "to" }, { "regardless", "of" }, { "subsequent", "to" }, { "such", "as" }, { "thanks", "to" }, { "together", "with" } };
// used by collapse3WP() KEPT IN ALPHABETICAL ORDER
private static final String[][] THREEWORD_PREPS = { { "by", "means", "of" }, { "in", "accordance", "with" }, { "in", "addition", "to" }, { "in", "case", "of" }, { "in", "front", "of" }, { "in", "lieu", "of" }, { "in", "place", "of" }, { "in", "spite", "of" }, { "on", "account", "of" }, { "on", "behalf", "of" }, { "on", "top", "of" }, { "with", "regard", "to" }, { "with", "respect", "to" } };
/**
* Given a list of typedDependencies, returns true if the node "node" is the
* governor of a conj relation with a dependent which is not a preposition
*
* @param node
* A node in this GrammaticalStructure
* @param list
* A list of typedDependencies
* @return true If node is the governor of a conj relation in the list with
* the dep not being a preposition
*/
private static boolean isConjWithNoPrep(IndexedWord node, Collection list) {
for (TypedDependency td : list) {
if (td.gov().equals(node) && td.reln() == CONJUNCT) {
// we have a conjunct
// check the POS of the dependent
String tdDepPOS = td.dep().tag();
if (!(tdDepPOS.equals("IN") || tdDepPOS.equals("TO"))) {
return true;
}
}
}
return false;
}
/**
* Collapse multiword preposition of the following format:
* prep|advmod|dep|amod(gov, mwp[0])
* dep(mpw[0],mwp[1])
* pobj|pcomp(mwp[1], compl) or pobj|pcomp(mwp[0], compl)
* -> prep_mwp[0]_mwp[1](gov, compl)
*
* prep|advmod|dep|amod(gov, mwp[1])
* dep(mpw[1],mwp[0])
* pobj|pcomp(mwp[1], compl) or pobj|pcomp(mwp[0], compl)
* -> prep_mwp[0]_mwp[1](gov, compl)
*
*
* The collapsing has to be done at once in order to know exactly which node
* is the gov and the dep of the multiword preposition. Otherwise this can
* lead to problems: removing a non-multiword "to" preposition for example!!!
* This method replaces the old "collapsedMultiWordPreps"
*
* @param list
* list of typedDependencies to work on
*/
private static void collapse2WP(Collection list) {
Collection newTypedDeps = new ArrayList();
for (String[] mwp : MULTIWORD_PREPS) {
// first look for patterns such as:
// X(gov, mwp[0])
// Y(mpw[0],mwp[1])
// Z(mwp[1], compl) or Z(mwp[0], compl)
// -> prep_mwp[0]_mwp[1](gov, compl)
collapseMultiWordPrep(list, newTypedDeps, mwp[0], mwp[1], mwp[0], mwp[1]);
// now look for patterns such as:
// X(gov, mwp[1])
// Y(mpw[1],mwp[0])
// Z(mwp[1], compl) or Z(mwp[0], compl)
// -> prep_mwp[0]_mwp[1](gov, compl)
collapseMultiWordPrep(list, newTypedDeps, mwp[0], mwp[1], mwp[1], mwp[0]);
}
}
/**
* Collapse multiword preposition of the following format:
* prep|advmod|dep|amod(gov, mwp0) dep(mpw0,mwp1) pobj|pcomp(mwp1, compl) or
* pobj|pcomp(mwp0, compl) -> prep_mwp0_mwp1(gov, compl)
*
*
* @param list List of typedDependencies to work on,
* @param newTypedDeps List of typedDependencies that we construct
* @param str_mwp0 First part of the multiword preposition to construct the collapsed
* preposition
* @param str_mwp1 Second part of the multiword preposition to construct the
* collapsed preposition
* @param w_mwp0 First part of the multiword preposition that we look for
* @param w_mwp1 Second part of the multiword preposition that we look for
*/
private static void collapseMultiWordPrep(Collection list, Collection newTypedDeps, String str_mwp0, String str_mwp1, String w_mwp0, String w_mwp1) {
// first find the multiword_preposition: dep(mpw[0], mwp[1])
// the two words should be next to another in the sentence (difference of
// indexes = 1)
IndexedWord mwp0 = null;
IndexedWord mwp1 = null;
TypedDependency dep = null;
for (TypedDependency td : list) {
if (td.gov().value().equalsIgnoreCase(w_mwp0) && td.dep().value().equalsIgnoreCase(w_mwp1) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp0 = td.gov();
mwp1 = td.dep();
dep = td;
}
}
if (mwp0 == null) {
return;
}
// now search for prep|advmod|dep|amod(gov, mwp0)
IndexedWord governor = null;
TypedDependency prep = null;
for (TypedDependency td1 : list) {
if ((td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == ADVERBIAL_MODIFIER || td1.reln() == ADJECTIVAL_MODIFIER || td1.reln() == DEPENDENT || td1.reln() == MULTI_WORD_EXPRESSION) && td1.dep().equals(mwp0)) {
// we found prep|advmod|dep|amod(gov, mwp0)
prep = td1;
governor = prep.gov();
}
}
if (prep == null) {
return;
}
// search for the complement: pobj|pcomp(mwp1,X)
// or for pobj|pcomp(mwp0,X)
// There may be more than one in weird constructions; if there are several,
// take the one with the LOWEST index!
TypedDependency pobj = null;
TypedDependency newtd = null;
for (TypedDependency td2 : list) {
if ((td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td2.gov().equals(mwp1) || td2.gov().equals(mwp0))) {
if (pobj == null || pobj.dep().index() > td2.dep().index()) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr;
if (td2.reln() == PREPOSITIONAL_COMPLEMENT) {
gr = EnglishGrammaticalRelations.getPrepC(str_mwp0 + '_' + str_mwp1);
} else {
gr = EnglishGrammaticalRelations.getPrep(str_mwp0 + '_' + str_mwp1);
}
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
}
}
if (pobj == null || newtd == null) {
return;
}
// only if we found the three parts, set to KILL and remove
// and add the new one
// Necessarily from the above: prep != null, dep != null, pobj != null, newtd != null
if (DEBUG) {
System.err.println("Removing " + prep + ", " + dep + ", and " + pobj);
System.err.println(" and adding " + newtd);
}
prep.setReln(KILL);
dep.setReln(KILL);
pobj.setReln(KILL);
newTypedDeps.add(newtd);
// now remove typed dependencies with reln "kill"
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1)) {
// CDM: Thought of adding this in Jan 2010, but it causes
// conflicting relations tmod vs. pobj. Needs more thought
// maybe restrict pobj to first NP in PP, and allow tmod for a later
// one?
if (td1.reln() == TEMPORAL_MODIFIER) {
// special case when an extra NP-TMP is buried in a PP for
// "during the same period last year"
td1.setGov(pobj.dep());
} else {
td1.setGov(governor);
}
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
/**
* Collapse multi-words preposition of the following format: advmod|prt(gov,
* mwp[0]) prep(gov,mwp[1]) pobj|pcomp(mwp[1], compl) ->
* prep_mwp[0]_mwp[1](gov, compl)
*
*
* @param list
* List of typedDependencies to work on
*/
private static void collapse2WPbis(Collection list) {
Collection newTypedDeps = new ArrayList();
for (String[] mwp : MULTIWORD_PREPS) {
newTypedDeps.clear();
IndexedWord mwp0 = null;
IndexedWord mwp1 = null;
IndexedWord governor = null;
TypedDependency prep = null;
TypedDependency dep = null;
TypedDependency pobj = null;
TypedDependency newtd = null;
// first find the first part of the multi_preposition: advmod|prt(gov, mwp[0])
for (TypedDependency td : list) {
if (td.dep().value().equalsIgnoreCase(mwp[0]) && (td.reln() == PHRASAL_VERB_PARTICLE || td.reln() == ADVERBIAL_MODIFIER || td.reln() == DEPENDENT || td.reln() == MULTI_WORD_EXPRESSION)) {
// we found advmod(gov, mwp0) or prt(gov, mwp0)
governor = td.gov();
mwp0 = td.dep();
dep = td;
}
}
// now search for the second part: prep(gov, mwp1)
// the two words in the mwp should be next to another in the sentence
// (difference of indexes = 1)
if (mwp0 == null || governor == null) {
continue;
}
for (TypedDependency td1 : list) {
if (td1.reln() == PREPOSITIONAL_MODIFIER && td1.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td1.dep().index() - mwp0.index()) == 1 && td1.gov().equals(governor)) {// we
// found
// prep(gov,
// mwp1)
mwp1 = td1.dep();
prep = td1;
}
}
if (mwp1 == null) {
continue;
}
// search for the complement: pobj|pcomp(mwp1,X)
for (TypedDependency td2 : list) {
if (td2.reln() == PREPOSITIONAL_OBJECT && td2.gov().equals(mwp1)) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1]);
newtd = new TypedDependency(gr, governor, pobj.dep());
}
if (td2.reln() == PREPOSITIONAL_COMPLEMENT && td2.gov().equals(mwp1)) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1]);
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
if (pobj == null) {
return;
}
// only if we found the three parts, set to KILL and remove
// and add the new one
// now prep != null, pobj != null and newtd != null
prep.setReln(KILL);
dep.setReln(KILL);
pobj.setReln(KILL);
newTypedDeps.add(newtd);
// now remove typed dependencies with reln "kill"
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1)) {
td1.setGov(governor);
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
}
/**
* Collapse 3-word preposition of the following format:
* This will be the case when the preposition is analyzed as a NP
* prep(gov, mwp0)
* X(mwp0,mwp1)
* X(mwp1,mwp2)
* pobj|pcomp(mwp2, compl)
* -> prep_mwp[0]_mwp[1]_mwp[2](gov, compl)
*
*
* It also takes flat annotation into account:
* prep(gov,mwp0)
* X(mwp0,mwp1)
* X(mwp0,mwp2)
* pobj|pcomp(mwp0, compl)
* -> prep_mwp[0]_mwp[1]_mwp[2](gov, compl)
*
*
*
* @param list
* List of typedDependencies to work on
*/
private static void collapse3WP(Collection list) {
Collection newTypedDeps = new ArrayList();
// first, loop over the prepositions for NP annotation
for (String[] mwp : THREEWORD_PREPS) {
newTypedDeps.clear();
IndexedWord mwp0 = null;
IndexedWord mwp1 = null;
IndexedWord mwp2 = null;
TypedDependency dep1 = null;
TypedDependency dep2 = null;
// first find the first part of the 3word preposition: dep(mpw[0], mwp[1])
// the two words should be next to another in the sentence (difference of
// indexes = 1)
for (TypedDependency td : list) {
if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp0 = td.gov();
mwp1 = td.dep();
dep1 = td;
}
}
// find the second part of the 3word preposition: dep(mpw[1], mwp[2])
// the two words should be next to another in the sentence (difference of
// indexes = 1)
for (TypedDependency td : list) {
if (td.gov().equals(mwp1) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp2 = td.dep();
dep2 = td;
}
}
if (dep1 != null && dep2 != null) {
// now search for prep(gov, mwp0)
IndexedWord governor = null;
TypedDependency prep = null;
for (TypedDependency td1 : list) {
if (td1.reln() == PREPOSITIONAL_MODIFIER && td1.dep().equals(mwp0)) {// we
// found
// prep(gov,
// mwp0)
prep = td1;
governor = prep.gov();
}
}
// search for the complement: pobj|pcomp(mwp2,X)
TypedDependency pobj = null;
TypedDependency newtd = null;
for (TypedDependency td2 : list) {
if (td2.reln() == PREPOSITIONAL_OBJECT && td2.gov().equals(mwp2)) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
if (td2.reln() == PREPOSITIONAL_COMPLEMENT && td2.gov().equals(mwp2)) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
}
// only if we found the governor and complement parts, set to KILL and
// remove
// and add the new one
if (prep != null && pobj != null && newtd != null) {
prep.setReln(KILL);
dep1.setReln(KILL);
dep2.setReln(KILL);
pobj.setReln(KILL);
newTypedDeps.add(newtd);
// now remove typed dependencies with reln "kill"
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) {
td1.setGov(governor);
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
}
}
// second, loop again looking at flat annotation
for (String[] mwp : THREEWORD_PREPS) {
newTypedDeps.clear();
IndexedWord mwp0 = null;
IndexedWord mwp1 = null;
IndexedWord mwp2 = null;
TypedDependency dep1 = null;
TypedDependency dep2 = null;
// first find the first part of the 3word preposition: dep(mpw[0], mwp[1])
// the two words should be next to another in the sentence (difference of
// indexes = 1)
for (TypedDependency td : list) {
if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
mwp0 = td.gov();
mwp1 = td.dep();
dep1 = td;
}
}
// find the second part of the 3word preposition: dep(mpw[0], mwp[2])
// the two words should be one word apart in the sentence (difference of
// indexes = 2)
for (TypedDependency td : list) {
if (td.gov().equals(mwp0) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 2) {
mwp2 = td.dep();
dep2 = td;
}
}
if (dep1 != null && dep2 != null) {
// now search for prep(gov, mwp0)
IndexedWord governor = null;
TypedDependency prep = null;
for (TypedDependency td1 : list) {
if (td1.dep().equals(mwp0) && td1.reln() == PREPOSITIONAL_MODIFIER) {// we
// found
// prep(gov,
// mwp0)
prep = td1;
governor = prep.gov();
}
}
// search for the complement: pobj|pcomp(mwp0,X)
TypedDependency pobj = null;
TypedDependency newtd = null;
for (TypedDependency td2 : list) {
if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_OBJECT) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
if (td2.gov().equals(mwp0) && td2.reln() == PREPOSITIONAL_COMPLEMENT) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
if (governor != null) {
newtd = new TypedDependency(gr, governor, pobj.dep());
}
}
}
// only if we found the governor and complement parts, set to KILL and
// remove
// and add the new one
if (prep != null && pobj != null && newtd != null) {
prep.setReln(KILL);
dep1.setReln(KILL);
dep2.setReln(KILL);
pobj.setReln(KILL);
newTypedDeps.add(newtd);
// now remove typed dependencies with reln "kill"
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) {
td1.setGov(governor);
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
}
}
}
/*
*
* While upgrading, here are some lists of common multiword prepositions which
* we might try to cover better. (Also do corpus counts for same?)
*
* (Prague Dependency Treebank) as per CRIT except for RESTR but for RESTR
* apart from RESTR away from RESTR aside from RESTR as from TSIN ahead of
* TWHEN back of LOC, DIR3 exclusive of* RESTR instead of SUBS outside of LOC,
* DIR3 off of DIR1 upwards of LOC, DIR3 as of TSIN because of CAUS inside of
* LOC, DIR3 irrespective of REG out of LOC, DIR1 regardless of REG according
* to CRIT due to CAUS next to LOC, RESTR owing to* CAUS preparatory to* TWHEN
* prior to* TWHEN subsequent to* TWHEN as to/for REG contrary to* CPR close
* to* LOC, EXT (except the case named in the next table) near to LOC, DIR3
* nearer to LOC, DIR3 preliminary to* TWHEN previous to* TWHEN pursuant to*
* CRIT thanks to CAUS along with ACMP together with ACMP devoid of* ACMP void
* of* ACMP
*
* http://www.keepandshare.com/doc/view.php?u=13166
*
* according to ahead of as far as as well as by means of due to far from in
* addition to in case of in front of in place of in spite of inside of
* instead of in to (into) near to next to on account of on behalf of on top
* of on to (onto) out of outside of owing to prior to with regards to
*
* www.eslmonkeys.com/book/learner/prepositions.pdf According to Ahead of
* Along with Apart from As for As to Aside from Because of But for Contrary
* to Except for Instead of Next to Out of Prior to Thanks to
*/
/**
* Collapse multi-words preposition of the following format, which comes from
* flat annotation. This handles e.g., "because of" (PP (IN because) (IN of)
* ...), "such as" (PP (JJ such) (IN as) ...)
*
* prep(gov, mwp[1]) dep(mpw[1], mwp[0]) pobj(mwp[1], compl) ->
* prep_mwp[0]_mwp[1](gov, compl)
*
* @param list List of typedDependencies to work on
*/
private static void collapseFlatMWP(Collection list) {
Collection newTypedDeps = new ArrayList();
for (String[] mwp : MULTIWORD_PREPS) {
newTypedDeps.clear();
IndexedWord mwp1 = null;
IndexedWord governor = null;
TypedDependency prep = null;
TypedDependency dep = null;
TypedDependency pobj = null;
// first find the multi_preposition: dep(mpw[1], mwp[0])
for (TypedDependency td : list) {
if (Math.abs(td.gov().index() - td.dep().index()) == 1 && td.gov().value().equalsIgnoreCase(mwp[1]) && td.dep().value().equalsIgnoreCase(mwp[0])) {
mwp1 = td.gov();
dep = td;
}
}
if (mwp1 == null) {
continue;
}
// now search for prep(gov, mwp1)
for (TypedDependency td1 : list) {
if (td1.dep().equals(mwp1) && td1.reln() == PREPOSITIONAL_MODIFIER) {
// we found prep(gov, mwp1)
prep = td1;
governor = prep.gov();
}
}
if (prep == null) {
continue;
}
// search for the complement: pobj|pcomp(mwp1,X)
for (TypedDependency td2 : list) {
if (td2.gov().equals(mwp1) && td2.reln() == PREPOSITIONAL_OBJECT) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1]);
newTypedDeps.add(new TypedDependency(gr, governor, pobj.dep()));
}
if (td2.gov().equals(mwp1) && td2.reln() == PREPOSITIONAL_COMPLEMENT) {
pobj = td2;
// create the new gr relation
GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1]);
newTypedDeps.add(new TypedDependency(gr, governor, pobj.dep()));
}
}
if (pobj == null) {
return;
}
// only if we found the three parts, set to KILL and remove
// we know prep != null && dep != null && dep != null
prep.setReln(KILL);
dep.setReln(KILL);
pobj.setReln(KILL);
// now remove typed dependencies with reln "kill"
// and promote possible orphans
for (TypedDependency td1 : list) {
if (td1.reln() != KILL) {
if (td1.gov().equals(mwp1)) {
td1.setGov(governor);
}
if (!newTypedDeps.contains(td1)) {
newTypedDeps.add(td1);
}
}
}
list.clear();
list.addAll(newTypedDeps);
}
}
/**
* This method gets rid of multiwords in conjunctions to avoid having them
* creating disconnected constituents e.g.,
* "bread-1 as-2 well-3 as-4 cheese-5" will be turned into conj_and(bread,
* cheese) and then dep(well-3, as-2) and dep(well-3, as-4) cannot be attached
* to the graph, these dependencies are erased
*
* @param list List of words to get rid of multiword conjunctions from
*/
private static void eraseMultiConj(Collection list) {
// find typed deps of form cc(gov, x)
for (TypedDependency td1 : list) {
if (td1.reln() == COORDINATION) {
IndexedWord x = td1.dep();
// find typed deps of form dep(x,y) and kill them
for (TypedDependency td2 : list) {
if (td2.gov().equals(x) && (td2.reln() == DEPENDENT || td2.reln() == MULTI_WORD_EXPRESSION || td2.reln() == COORDINATION ||
td2.reln() == ADVERBIAL_MODIFIER || td2.reln() == NEGATION_MODIFIER || td2.reln() == AUX_MODIFIER)) {
td2.setReln(KILL);
}
}
}
}
filterKill(list);
}
/**
* Remove duplicate relations: it can happen when collapsing stranded
* prepositions. E.g., "What does CPR stand for?" we get dep(stand, what), and
* after collapsing we also get prep_for(stand, what).
*
* @param list A list of typed dependencies to check through
*/
private static void removeDep(Collection list) {
Set prepRels = Generics.newHashSet(EnglishGrammaticalRelations.getPreps());
prepRels.addAll(EnglishGrammaticalRelations.getPrepsC());
for (TypedDependency td1 : list) {
if (prepRels.contains(td1.reln())) { // if we have a prep_ relation
IndexedWord gov = td1.gov();
IndexedWord dep = td1.dep();
for (TypedDependency td2 : list) {
if (td2.reln() == DEPENDENT && td2.gov().equals(gov) && td2.dep().equals(dep)) {
td2.setReln(KILL);
}
}
}
}
// now remove typed dependencies with reln "kill"
for (Iterator iter = list.iterator(); iter.hasNext();) {
TypedDependency td = iter.next();
if (td.reln() == KILL) {
if (DEBUG) {
System.err.println("Removing duplicate relation: " + td);
}
iter.remove();
}
}
}
/**
* Find and remove any exact duplicates from a dependency list.
* For example, the method that "corrects" nsubj dependencies can
* turn them into nsubjpass dependencies. If there is some other
* source of nsubjpass dependencies, there may now be multiple
* copies of the nsubjpass dependency. If the containing data type
* is a List, they may both now be in the List.
*/
private static void removeExactDuplicates(Collection list) {
Set set = new TreeSet(list);
list.clear();
list.addAll(set);
}
public static List readCoNLLXGrammaticalStructureCollection(String fileName) throws IOException {
return readCoNLLXGrammaticalStructureCollection(fileName, EnglishGrammaticalRelations.shortNameToGRel, new FromDependenciesFactory());
}
public static EnglishGrammaticalStructure buildCoNLLXGrammaticalStructure(List> tokenFields) {
return (EnglishGrammaticalStructure) buildCoNLLXGrammaticalStructure(tokenFields, EnglishGrammaticalRelations.shortNameToGRel, new FromDependenciesFactory());
}
public static class FromDependenciesFactory
implements GrammaticalStructureFromDependenciesFactory {
@Override
public EnglishGrammaticalStructure build(List tdeps, TreeGraphNode root) {
return new EnglishGrammaticalStructure(tdeps, root);
}
}
} // end class EnglishGrammaticalStructure