All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.EnglishGrammaticalRelations Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
// Stanford Dependencies - Code for producing and using Stanford dependencies.
// Copyright © 2005-2014 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//    [email protected]
//    http://nlp.stanford.edu/software/stanford-dependencies.shtml

package edu.stanford.nlp.trees;

import static edu.stanford.nlp.trees.EnglishPatterns.*;

import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Generics;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import static edu.stanford.nlp.trees.GrammaticalRelation.*;


/**
 * EnglishGrammaticalRelations is a
 * set of {@link GrammaticalRelation} objects for the English language.
 * These relations are commonly called Stanford Dependencies (SD).
 * 

* Grammatical relations can either be shown in their basic form, where each * input token receives a relation, or "collapsed" which does certain normalizations * which group words or turns them into relations. See * {@link EnglishGrammaticalStructure}. What is presented here mainly * shows the basic form, though there is some mixture. The "collapsed" grammatical * relations primarily differ as follows: *

    *
  • Some multiword conjunctions and prepositions are treated as single * words, and then processed as below.
  • *
  • Prepositions do not appear as words but are turned into new "prep" or "prepc" * grammatical relations, one for each preposition.
  • *
  • Conjunctions do not appear as words but are turned into new "conj" * grammatical relations, one for each conjunction.
  • *
  • The possessive "'s" is deleted, leaving just the relation between the * possessor and possessum.
  • *
  • Agents of passive sentences are recognized and marked as agent and not as prep_by.
  • *
*

* This set of English grammatical relations is not intended to be * exhaustive or immutable. It's just where we're at now. *

*

* See {@link GrammaticalRelation} for details of fields and matching. *

*

* If using LexicalizedParser, it should be run with the * -retainTmpSubcategories option and one of the * -splitTMP options (e.g., -splitTMP 1) in order to * get the temporal NP dependencies maximally right! *

* Implementation notes: Don't change the set of GRs without discussing it * with people first. If a change is needed, to add a new grammatical relation: *

    *
  • Governor nodes of the grammatical relations should be the lowest ones.
  • *
  • Check the semantic head rules in SemanticHeadFinder and * ModCollinsHeadFinder, both in the trees package. That's what will be used to * match here.
  • *
  • Create and define the GrammaticalRelation similarly to the others.
  • *
  • Add it to the values array at the end of the file.
  • *
* The patterns in this code assume that an NP may be followed by either a * -ADV or -TMP functional tag but there are no other functional tags represented. * This corresponds to what we currently get from NPTmpRetainingTreeNormalizer or * DependencyTreeTransformer. * * @author Bill MacCartney * @author Marie-Catherine de Marneffe * @author Christopher Manning * @author Galen Andrew (refactoring English-specific stuff) * @see GrammaticalStructure * @see GrammaticalRelation * @see EnglishGrammaticalStructure */ public class EnglishGrammaticalRelations { //todo: Things still to fix: comparatives, in order to clauses, automatic Vadas-like NP structure /** This class is just a holder for static classes * that act a bit like an enum. */ private EnglishGrammaticalRelations() {} // By setting the HeadFinder to null, we find out right away at // runtime if we have incorrectly set the HeadFinder for the // dependency tregexes private static final TregexPatternCompiler tregexCompiler = new TregexPatternCompiler((HeadFinder) null); /** * The "predicate" grammatical relation. The predicate of a * clause is the main VP of that clause; the predicate of a * subject is the predicate of the clause to which the subject * belongs.

*

* Example:
* "Reagan died" → pred(Reagan, died) */ public static final GrammaticalRelation PREDICATE = new GrammaticalRelation(Language.English, "pred", "predicate", DEPENDENT, "S|SINV", tregexCompiler, "S|SINV <# VP=target"); /** * The "auxiliary" grammatical relation. An auxiliary of a clause is a * non-main verb of the clause.

*

* Example:
* "Reagan has died" → aux(died, has) */ public static final GrammaticalRelation AUX_MODIFIER = new GrammaticalRelation(Language.English, "aux", "auxiliary", DEPENDENT, "VP|SQ|SINV|CONJP", tregexCompiler, "VP < VP < (/^(?:TO|MD|VB.*|AUXG?|POS)$/=target)", "SQ|SINV < (/^(?:VB|MD|AUX)/=target $++ /^(?:VP|ADJP)/)", "CONJP < TO=target < VB", // (CONJP not to mention) // add handling of tricky VP fronting cases... "SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBG))"); /** * The "passive auxiliary" grammatical relation. A passive auxiliary of a * clause is a * non-main verb of the clause which contains the passive information. *

* Example:
* "Kennedy has been killed" → auxpass(killed, been) */ public static final GrammaticalRelation AUX_PASSIVE_MODIFIER = new GrammaticalRelation(Language.English, "auxpass", "passive auxiliary", AUX_MODIFIER, "VP|SQ|SINV", tregexCompiler, "VP < (/^(?:VB|AUX|POS)/=target < " + passiveAuxWordRegex + " ) < (VP|ADJP [ < VBN|VBD | < (VP|ADJP < VBN|VBD) < CC ] )", "SQ|SINV < (/^(?:VB|AUX|POS)/=target < " + beAuxiliaryRegex + " $++ (VP < VBD|VBN))", // add handling of tricky VP fronting cases... "SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBD|VBN))", "SINV < (VP=target < (VP < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ")) $-- (VP < VBD|VBN))"); /** * The "copula" grammatical relation. A copula is the relation between * the complement of a copular verb and the copular verb.

*

* Examples:
* "Bill is big" → cop(big, is)
* "Bill is an honest man" → cop(man, is) */ public static final GrammaticalRelation COPULA = new GrammaticalRelation(Language.English, "cop", "copula", AUX_MODIFIER, "VP|SQ|SINV|SBARQ", tregexCompiler, "VP < (/^(?:VB|AUX)/=target < " + copularWordRegex + " [ $++ (/^(?:ADJP|NP$|WHNP$)/ !< (VBN|VBD !$++ /^N/)) | $++ (S <: (ADJP < JJ)) ] )", "SQ|SINV < (/^(?:VB|AUX)/=target < " + copularWordRegex + " [ $++ (ADJP !< VBN|VBD) | $++ (NP $++ NP) | $++ (S <: (ADJP < JJ)) ] )", // matches (what, is) in "what is that" after the SQ has been flattened out of the tree "SBARQ < (/^(?:VB|AUX)/=target < " + copularWordRegex + ") < (WHNP < WP)", // "Such a great idea this was" "SINV <# (NP $++ (NP $++ (VP=target < (/^(?:VB|AUX)/ < " + copularWordRegex + "))))"); private static final String ETC_PAT = "(FW < /^(?i:etc)$/)"; private static final String ETC_PAT_target = "(FW=target < /^(?i:etc)$/)"; private static final String FW_ETC_PAT = "(ADVP|NP <1 (FW < /^(?i:etc)$/))"; private static final String FW_ETC_PAT_target = "(ADVP|NP=target <1 (FW < /^(?i:etc)$/))"; // match "not", "n't", "nt" (for informal writing), or "never" as _complete_ string private static final String NOT_PAT = "/^(?i:n[o']?t|never)$/"; private static final String WESTERN_SMILEY = "/^(?:[<>]?[:;=8][\\-o\\*']?(?:-RRB-|-LRB-|[DPdpO\\/\\\\\\:}{@\\|\\[\\]])|(?:-RRB-|-LRB-|[DPdpO\\/\\\\\\:}{@\\|\\[\\]])[\\-o\\*']?[:;=8][<>]?)$/"; private static final String ASIAN_SMILEY = "/(?!^--$)^(?:-LRB-)?[\\-\\^x=~<>'][_.]?[\\-\\^x=~<>'](?:-RRB-)?$/"; /** * The "conjunct" grammatical relation. A conjunct is the relation between * two elements connected by a conjunction word. We treat conjunctions * asymmetrically: The head of the relation is the first conjunct and other * conjunctions depend on it via the conj relation.

*

* Example:
* "Bill is big and honest" → conj(big, honest) *

* Note:Modified in 2010 to exclude the case of a CC/CONJP first in its phrase: it has to conjoin things. */ public static final GrammaticalRelation CONJUNCT = new GrammaticalRelation(Language.English, "conj", "conjunct", DEPENDENT, "VP|(?:WH)?NP(?:-TMP|-ADV)?|ADJP|PP|QP|ADVP|UCP(?:-TMP|-ADV)?|S|NX|SBAR|SBARQ|SINV|SQ|JJP|NML|RRC", tregexCompiler, "VP|S|SBAR|SBARQ|SINV|SQ|RRC < (CC|CONJP $-- !/^(?:``|-LRB-|PRN|PP|ADVP|RB)/ $+ !/^(?:SBAR|PRN|``|''|-[LR]RB-|,|:|\\.)$/=target)", // This case is separated out from the previous case to // avoid conflicts with advcl when you have phrases such as // "but only because ..." "SBAR < (CC|CONJP $-- @SBAR $+ @SBAR=target)", // non-parenthetical or comma in suitable phrase with conj then adverb to left "VP|S|SBAR|SBARQ|SINV|SQ|RRC < (CC|CONJP $-- !/^(?:``|-LRB-|PRN|PP|ADVP|RB)/ $+ (ADVP $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target))", // content phrase to the right of a comma or a parenthetical // The test at the end is to make sure that a conjunction or // comma etc actually show up between the target of the conj // dependency and the head of the phrase. Otherwise, a // different relationship is probably more appropriate. // Note that this test looks for one of two things: a // cc/conjp which does not have a , between it and the // target or a , which does not appear to the right of a // cc/conjp. This test eliminates things such as // parenthetics which come after a list, such as in the // sentence "to see the market go down and dump everything, // which ..." where "go down and dump everything, which..." // is all in one VP node. "VP|S|SBAR|SBARQ|SINV|SQ=root < (CC|CONJP $-- !/^(?:``|-LRB-|PRN|PP|ADVP|RB)/) < (/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/ $+ (/^S|SINV$|^(?:A|N|V|PP|PRP|J|W|R)/=target [$-- (CC|CONJP $-- (__ ># =root) !$++ (/^:|,$/ $++ =target)) | $-- (/^:|,$/ $-- (__ ># =root) [!$-- /^CC|CONJP$/ | $++ (=target < (/^,$/ $++ (__ ># =target)))])] ) )", // non-parenthetical or comma in suitable phrase with conjunction to left "/^(?:ADJP|JJP|PP|QP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ [ < (CC|CONJP $-- !/^(?:``|-LRB-|PRN)$/ $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target) | < " + ETC_PAT_target + " | < " + FW_ETC_PAT_target + "]", // non-parenthetical or comma in suitable phrase with conj then adverb to left "/^(?:ADJP|PP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ < (CC|CONJP $-- !/^(?:``|-LRB-|PRN)$/ $+ (ADVP $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target))", // content phrase to the right of a comma or a parenthetical "/^(?:ADJP|PP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ [ < (CC|CONJP $-- !/^(?:``|-LRB-|PRN)$/) | < " + ETC_PAT + " | < " + FW_ETC_PAT + "] < (/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/ [ $+ /^S|SINV$|^(?:A|N|V|PP|PRP|J|W|R)/=target | $+ " + ETC_PAT_target + " ] )", // content phrase to the left of a comma for at least NX "NX|NML [ < (CC|CONJP $- __) | < " + ETC_PAT + "] < (/^,$/ $- /^(?:A|N|V|PP|PRP|J|W|R|S)/=target)", // to take the conjunct in a preconjunct structure "either X or Y" // also catches some missing examples of etc as conj "/^(?:VP|S|SBAR|SBARQ|SINV|ADJP|PP|QP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ [ < (CC $++ (CC|CONJP $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target)) | <- " + ETC_PAT_target + " | <- " + FW_ETC_PAT_target + " ]"); /** * The "coordination" grammatical relation. A coordination is the relation * between an element and a conjunction. *

* Example:
* "Bill is big and honest." → cc(big, and) */ public static final GrammaticalRelation COORDINATION = new GrammaticalRelation(Language.English, "cc", "coordination", DEPENDENT, ".*", tregexCompiler, "__ [ < (CC=target !< /^(?i:either|neither|both)$/ ) | < (CONJP=target !< (RB < /^(?i:not)$/ $+ (RB|JJ < /^(?i:only|just|merely)$/))) ]"); /** * The "punctuation" grammatical relation. This is used for any piece of * punctuation in a clause, if punctuation is being retained in the * typed dependencies. *

* Example:
* "Go home!" → punct(Go, !) *

* The condition for NFP to appear hear is that it does not match the emoticon patterns under discourse. */ public static final GrammaticalRelation PUNCTUATION = new GrammaticalRelation(Language.English, "punct", "punctuation", DEPENDENT, ".*", tregexCompiler, "__ < /^(?:\\.|:|,|''|``|\\*|-LRB-|-RRB-|HYPH)$/=target", "__ < (NFP=target !< " + WESTERN_SMILEY + " !< " + ASIAN_SMILEY + ")"); /** * The "argument" grammatical relation. An argument of a VP is a * subject or complement of that VP; an argument of a clause is * an argument of the VP which is the predicate of that * clause.

*

* Example:
* "Clinton defeated Dole" → arg(defeated, Clinton), arg(defeated, Dole) */ public static final GrammaticalRelation ARGUMENT = new GrammaticalRelation(Language.English, "arg", "argument", DEPENDENT); /** * The "subject" grammatical relation. The subject of a VP is * the noun or clause that performs or experiences the VP; the * subject of a clause is the subject of the VP which is the * predicate of that clause.

*

* Examples:
* "Clinton defeated Dole" → subj(defeated, Clinton)
* "What she said is untrue" → subj(is, What she said) */ public static final GrammaticalRelation SUBJECT = new GrammaticalRelation(Language.English, "subj", "subject", ARGUMENT); /** * The "nominal subject" grammatical relation. A nominal subject is * a subject which is an noun phrase.

*

* Example:
* "Clinton defeated Dole" → nsubj(defeated, Clinton) */ public static final GrammaticalRelation NOMINAL_SUBJECT = new GrammaticalRelation(Language.English, "nsubj", "nominal subject", SUBJECT, "S|SQ|SBARQ|SINV|SBAR|PRN", tregexCompiler, "S=subj < ((NP|WHNP=target !< EX !<# (/^NN/ < (" + timeWordRegex + "))) $++ VP=verb) : (=subj !> VP | !<< (=verb < TO))", "S < ( NP=target <# (/^NN/ < " + timeWordRegex + ") !$++ NP $++VP)", "SQ|PRN < (NP=target !< EX $++ VP)", "SQ < (NP=target !< EX $- (/^(?:VB|AUX)/ < " + copularWordRegex + ") !$++ VP)", // Allows us to match "Does it?" without matching "Who does it?" "SQ < (NP=target !< EX $- /^(?:VB|AUX)/ !$++ VP) !$-- NP|WHNP", "SQ < ((NP=target !< EX) $- (RB $- /^(?:VB|AUX)/) ![$++ VP])", "SBARQ < WHNP=target < (SQ < (VP !$-- NP))", // This will capture incorrectly parsed trees in sentences // such as "What disease causes cancer" without capturing // correctly parsed trees such as "What do elephants eat?" "SBARQ < WHNP=target < (SQ < ((/^(?:VB)/ !< " + copularWordRegex + ") !$-- NP !$++ VP))", "SBARQ < (SQ=target < (/^(?:VB|AUX)/ < " + copularWordRegex + ") !< VP)", // matches subj in SINV "SINV < (NP|WHNP=target [ $- VP|VBZ|VBD|VBP|VB|MD|AUX | $- (@RB|ADVP $- VP|VBZ|VBD|VBP|VB|MD|AUX) | !$- __ !$ @NP] )", // Another SINV subj, such as "Such a great idea this was" "SINV < (NP $++ (NP=target $++ (VP < (/^(?:VB|AUX)/ < " + copularWordRegex + "))))", //matches subj in xcomp like "He considered him a friend" "S < (NP=target $+ NP|ADJP) > VP", // matches subj in relative clauses "SBAR < WHNP=target [ < (S < (VP !$-- NP) !< SBAR) | < (VP !$-- NP) !< S ]", // second disjunct matches errors where there is no S under SBAR and otherwise does no harm // matches subj in relative clauses "SBAR !< WHNP < (S !< (NP $++ VP)) > (VP > (S $- WHNP=target))", // matches subj in existential "there" SQ "SQ < ((NP < EX) $++ NP=target)", // matches subj in existential "there" S "S < (NP < EX) <+(VP) (VP < NP=target)", // matches (what, that) in "what is that" after the SQ has been flattened out of the tree "SBARQ < (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (WHNP < WP) < NP=target", // matches (what, wrong) in "what is wrong with ..." after the SQ has been flattened out of the tree // note that in that case "wrong" is taken as the head thanks to SemanticHeadFinder hackery // The !$++ matches against (what, worth) in What is UAL stock worth? "SBARQ < (WHNP=target $++ ((/^(?:VB|AUX)/ < " + copularWordRegex + ") $++ ADJP=adj !$++ (NP $++ =adj)))", // the (NP < EX) matches (is, WHNP) in "what dignity is there in ..." // the PP matches (is, WHNP) in "what is on the test" "SBARQ <1 WHNP=target < (SQ < (/^(?:VB|AUX)/ < " + copularWordRegex + ") [< (NP < EX) | < PP])"); /** * The "nominal passive subject" grammatical relation. A nominal passive * subject is a subject of a passive which is an noun phrase.

*

* Example:
* "Dole was defeated by Clinton" → nsubjpass(defeated, Dole) *

* This pattern recognizes basic (non-coordinated) examples. The coordinated * examples are currently handled by correctDependencies() in * EnglishGrammaticalStructure. This seemed more accurate than any tregex * expression we could come up with. */ public static final GrammaticalRelation NOMINAL_PASSIVE_SUBJECT = new GrammaticalRelation(Language.English, "nsubjpass", "nominal passive subject", NOMINAL_SUBJECT, "S|SQ", tregexCompiler, "S|SQ < (WHNP|NP=target !< EX) < (VP < (/^(?:VB|AUX)/ < " + passiveAuxWordRegex + ") < (VP < VBN|VBD))"); /** * The "clausal subject" grammatical relation. A clausal subject is * a subject which is a clause.

*

* Examples: (subject is "what she said" in both examples)
* "What she said makes sense" → csubj(makes, said)
* "What she said is untrue" → csubj(untrue, said) */ public static final GrammaticalRelation CLAUSAL_SUBJECT = new GrammaticalRelation(Language.English, "csubj", "clausal subject", SUBJECT, "S", tregexCompiler, "S < (SBAR|S=target !$+ /^,$/ $++ (VP !$-- NP))"); /** * The "clausal passive subject" grammatical relation. A clausal passive subject is * a subject of a passive verb which is a clause.

*

* Example: (subject is "that she lied")
* "That she lied was suspected by everyone" → csubjpass(suspected, lied) */ public static final GrammaticalRelation CLAUSAL_PASSIVE_SUBJECT = new GrammaticalRelation(Language.English, "csubjpass", "clausal passive subject", CLAUSAL_SUBJECT, "S", tregexCompiler, "S < (SBAR|S=target !$+ /^,$/ $++ (VP < (VP < VBN|VBD) < (/^(?:VB|AUXG?)/ < " + passiveAuxWordRegex + ") !$-- NP))", "S < (SBAR|S=target !$+ /^,$/ $++ (VP <+(VP) (VP < VBN|VBD > (VP < (/^(?:VB|AUX)/ < " + passiveAuxWordRegex + "))) !$-- NP))"); /** * The "complement" grammatical relation. A complement of a VP * is any object (direct or indirect) of that VP, or a clause or * adjectival phrase which functions like an object; a complement * of a clause is an complement of the VP which is the predicate * of that clause.

*

* Examples:
* "She gave me a raise" → * comp(gave, me), * comp(gave, a raise)
* "I like to swim" → * comp(like, to swim) */ public static final GrammaticalRelation COMPLEMENT = new GrammaticalRelation(Language.English, "comp", "complement", ARGUMENT); /** * The "object" grammatical relation. An object of a VP * is any direct object or indirect object of that VP; an object * of a clause is an object of the VP which is the predicate * of that clause.

*

* Examples:
* "She gave me a raise" → * obj(gave, me), * obj(gave, raise) */ public static final GrammaticalRelation OBJECT = new GrammaticalRelation(Language.English, "obj", "object", COMPLEMENT); /** * The "direct object" grammatical relation. The direct object * of a verb is the noun phrase which is the (accusative) object of * the verb; the direct object of a clause or VP is the direct object of * the head predicate of that clause.

*

* Example:
* "She gave me a raise" → * dobj(gave, raise)

* Note that dobj can also be assigned by the conversion of rel in the postprocessing. */ public static final GrammaticalRelation DIRECT_OBJECT = new GrammaticalRelation(Language.English, "dobj", "direct object", OBJECT, "VP|SQ|SBARQ?", tregexCompiler, "VP !< (/^(?:VB|AUX)/ [ < " + copularWordRegex + " | < " + clausalComplementRegex + " ]) < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] ) " + // The next qualification eliminates parentheticals that // come after the actual dobj " <# (__ !$++ (NP $++ (/^[:]$/ $++ =target))) ", // Examples such as "Rolls-Royce expects sales to remain steady" "VP < (S < (NP|WHNP=target $++ (VP < TO)))", // This matches rare cases of misparses, such as "What // disease causes cancer?" where the "causes" does not get a // surrounding VP. Hopefully it does so without overlapping // any other dependencies. "SQ < (/^(?:VB)/=verb !< " + copularWordRegex + ") $-- WHNP !< VP !< (/^(?:VB)/ ! == =verb) < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] )", // The rule for Wh-questions // cdm Jul 2010: No longer require WHNP as first child of SBARQ below: often not because of adverbials, quotes, etc., and removing restriction does no harm // this next pattern used to assume no empty NPs. Corrected. // One could require the VP at the end of the <+ to also be !< (/^(?:VB|AUX)/ $. SBAR) . This would be right for complement SBAR, but often avoids good matches for adverbial SBAR. Adding it kills 4 good matches for avoiding 2 wrong matches on sum of TB3-train and EWT "SBARQ < (WHNP=target !< WRB !<# (/^NN/ < " + timeWordRegex + ")) <+(SQ|SINV|S|VP) (VP !< NP|TO !< (S < (VP < TO)) !< (/^(?:VB|AUX)/ < " + copularWordRegex + " $++ (VP < VBN|VBD)) !< (PP <: IN|TO) $-- (NP !< /^-NONE-$/))", // matches direct object in relative clauses with relative pronoun "I saw the book that you bought". Seems okay. If this is changed, also change the pattern for "rel" // TODO: this can occasionally produce incorrect dependencies, such as the sentence // "with the way which his split-fingered fastball is behaving" // eg take a tree where the verb doesn't have an object "SBAR < (WHNP=target !< WRB) < (S < NP < (VP !< SBAR !<+(VP) (PP <- IN|TO) !< (S < (VP < TO))))", // // matches direct object for long dependencies in relative clause without explicit relative pronouns // "SBAR !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !$-- CC $-- NP > NP=target " + // // avoid conflicts with rcmod. TODO: we could look for // // empty nodes in this kind of structure and use that to // // find dobj, tmod, advmod, etc. won't help the parser, // // of course, but will help when converting a treebank // // which contains empties // // Example: "with the way his split-fingered fastball is behaving" // "!($-- @NP|WHNP|NML > @NP|WHNP <: (S !< (VP < TO)))", // If there was an NP between the WHNP and the ADJP, we want // that NP to have the nsubj relation, and the WHNP is either // a dobj or a pobj instead. For example, dobj(What, worth) // in "What is UAL stock worth?" "SBARQ < (WHNP=target $++ ((/^(?:VB|AUX)/ < " + copularWordRegex + ") $++ (ADJP=adj !< (PP !< NP)) $++ (NP $++ =adj)))" // Now allow $++ in main pattern above so don't need this. // "SBAR !< (WHPP|WHNP|WHADVP) < (S < (@NP $+ (ADVP $+ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO))))) !$-- CC $-- NP > NP=target" // Excluding BE doesn't allow cases of NP-PRD followed by NP-TMP or NP-LOC like "These are Europeans next door." // Doc said: case with an iobj before dobj as two regular NPs. (This won't match if second one is explicitly NP-TMP.) But basic case covers this case. Does nothing. // "VP < (NP $+ (NP|WHNP=target !< (/^NN/ < " + timeWordLotRegex + "))) !<(/^(?:VB|AUX)/ < " + copularWordRegex + ")", // this time one also included "lot" // Doc said: match "give it next week". CDM 2013: I think this was put in to handle parse errors where the 2 NPs of a ditransitive were grouped into 1. But it is in principle wrong, and including it seems to be a no-op on TB3 WSJ. So exclude for now. // "VP < (NP < (NP $+ (/^(NP|WHNP)$/=target !< (/^NN/ < " + timeWordLotRegex + "))))!< (/^(?:VB|AUX)/ < " + copularWordRegex + ")", // this time one also included "lot" // Doc said: matches direct object in relative clauses "I saw the book that you said you bought". But it didn't seem to determine anything. // This was various attempts at handling a long distance dependency, but that doesn't work; now handled through rel mechanism. // "SBAR !< WHNP|WHADVP < (S < (@NP $++ (VP !$++ NP))) > (VP > (S < NP $- WHNP=target))", // "SBAR !< WHNP|WHADVP|IN < (S < @NP < (VP !< (NP !<<# " + timeWordRegex + "))) > (VP > (S < NP $- WHNP=target))", // "S < (@NP !< /^-NONE-$/) <+(VP) (VP !< (@NP !< /^-NONE-$/ < (/^VB/ !< " + copularWordRegex + ")) !< CONJP|CC|SBAR) > (@SBAR !< @WHNP|WHADVP $- /^VB/ >+(VP|S|SBAR) (S < (@NP !< /^-NONE-$/ !<<# " + timeWordRegex + ") $- (@WHNP=target !< /^-NONE-$/ !<# WRB)))", // we now don't match "VBG > PP $+ NP=target", since it seems better to CM to regard these quasi preposition uses (like "including soya") as prepositions rather than verbs with objects -- that's certainly what the phrase structure at least suggests in the PTB. They're now matched as pobj ); /** * The "indirect object" grammatical relation. The indirect * object of a VP is the noun phrase which is the (dative) object * of the verb; the indirect object of a clause is the indirect * object of the VP which is the predicate of that clause. *

* Example:
* "She gave me a raise" → * iobj(gave, me) */ public static final GrammaticalRelation INDIRECT_OBJECT = new GrammaticalRelation(Language.English, "iobj", "indirect object", OBJECT, "VP", tregexCompiler, "VP < (NP=target !< /\\$/ !<# (/^NN/ < " + timeWordRegex + ") $+ (NP !<# (/^NN/ < " + timeWordRegex + ")))", // this next one was meant to fix common mistakes of our parser, but is perhaps too dangerous to keep // excluding selfRegex leaves out phrases such as "I cooked dinner myself" // excluding DT leaves out phrases such as "My dog ate it all"" "VP < (NP=target < (NP !< /\\$/ $++ (NP !<: (PRP < " + selfRegex + ") !<: DT !< (/^NN/ < " + timeWordLotRegex + ")) !$ CC|CONJP !$ /^,$/ !$++ /^:$/))"); /** * The "prepositional object" grammatical relation. The object of a * preposition is the head of a noun phrase following the preposition, or * the adverbs "here" and "there". * (The preposition in turn may be modifying a noun, verb, etc.) * We here define cases of VBG quasi-prepositions like "including", * "concerning", etc. as instances of pobj (unlike the Penn Treebank). *

* Example:
* "I sat on the chair" → * pobj(on, chair) *

* (The preposition can be called a FW for pace, versus, etc. It can also * be called a CC - but we don't currently handle that and would need to * distinguish from conjoined PPs. Jan 2010 update: We now insist that the * NP must follow the preposition. This prevents a preceding NP measure * phrase being matched as a pobj. We do allow a preposition tagged RB * followed by an NP pobj, as happens in the Penn Treebank for adverbial uses * of PP like "up 19%") */ public static final GrammaticalRelation PREPOSITIONAL_OBJECT = new GrammaticalRelation(Language.English, "pobj", "prepositional object", OBJECT, "SBARQ|PP(?:-TMP)?|WHPP|PRT|ADVP|WHADVP|XS", tregexCompiler, "/^(?:PP(?:-TMP)?|(?:WH)?(?:PP|ADVP))$/ < (SYM|IN|VBG|VBN|TO|FW|RB|RBR $++ (/^(?:WH)?(?:NP|ADJP)(?:-TMP|-ADV)?$/=target !$- @NP) !< /^(?i:not)$/)", // We allow ADVP with NP objects for cases like (ADVP earlier this year) "/^PP(?:-TMP)?$/ < (/^(?:IN|VBG|VBN|TO)$/ $+ (ADVP=target [ < (RB < /^(?i:here|there)$/) | < (ADVP < /^NP(?:-TMP)?$/) ] ))", // second disjunct is weird ADVP, only matches 1 tree in 2-21 // to deal with preposition stranding in questions (e.g., "Which city do you live in?") -- the preposition is sometimes treated as a particle by the parser (works well but doesn't preserve the tree structure!) "PRT >- (VP !< (S < (VP < TO)) >+(SQ|SINV|S|VP) (SBARQ <, (WHNP=target !< WRB)) $-- (NP !< /^-NONE-$/))", "(PP <: IN|TO) >- (VP !< (S < (VP < TO)) >+(SQ|SINV|S|VP) (SBARQ <, (WHNP=target !< WRB)) $-- (NP !< /^-NONE-$/))", "(PP <: IN|TO) $- (NP $-- (VBZ|VBD) !$++ VP) >+(SQ) (SBARQ <, (WHNP=target !< WRB)) $-- (NP !< /^-NONE-$/)", "XS|ADVP < (IN < /^(?i:at)$/) < JJS|DT=target", // at least, at most, at best, at worst, at all //"PP < (CC < less) < NP", "@PP < CC < @NP=target !< @IN|TO|VBG|VBN|RB|RP|PP", // for cases where "preposition" like "plus", "but", or "versus" // to handle "in and out of government" "@WHPP|PP < (@WHPP|PP $++ (CC|CONJP $++ (@WHPP|PP $+ (NP=target !$+ __))))", // to handle "What weapon is Apollo most proficient with?" "SBARQ < (WHNP=target $++ ((/^(?:VB|AUX)/ < " + copularWordRegex + ") $++ (ADJP=adj < (PP !< NP)) $++ (NP $++ =adj)))"); /** * The "prepositional complement" grammatical relation. * This is used when the complement of a preposition is a clause or * an adverbial or prepositional phrase. * The prepositional complement of * a preposition is the head of the sentence following the preposition, * or the preposition head of the PP. *

* Examples:
* "We have no useful information on whether users are at risk" &arr; * pcomp(on, are)
* "They heard about you missing classes." &arr; * pcomp(about, missing)
* It is warmer in Greece than in Italy &arr; * pcomp(than, in) */ public static final GrammaticalRelation PREPOSITIONAL_COMPLEMENT = new GrammaticalRelation(Language.English, "pcomp", "prepositional complement", COMPLEMENT, "(?:WH)?PP(?:-TMP)?", tregexCompiler, "@PP|WHPP < (IN|VBG|VBN|TO $+ @SBAR|S|PP|ADVP=target)", // no intervening NP; VBN is for "compared with" "@PP|WHPP < (RB $+ @SBAR|S=target)", // RB is for weird tagging like "after/RB adjusting for inflation" "@PP|WHPP !< IN|TO < (SBAR=target <, (IN $+ S))"); // /** // * The "attributive" grammatical relation. The attributive is the complement of a // * verb such as "to be, to seem, to appear". // *

// * These mainly occur in questions. Arguably they shouldn't and we should treat the question // * WHNP and WHADJP as predicates (as we do for ADJP and NP complements (NP-PRD and ADJP-PRD), // * but we at present don't produce this. // */ // public static final GrammaticalRelation ATTRIBUTIVE = // new GrammaticalRelation(Language.English, "attr", "attributive", // COMPLEMENT, "VP|SBARQ|SQ", tregexCompiler, // new String[] { // "VP < NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + ") !$ (NP < EX)", // // "What is that?" // "SBARQ < (WHNP|WHADJP=target $+ (SQ < (/^(?:VB|AUX)/ < " + copularWordRegex + " !$++ VP) !< (VP <- (PP <:IN)) !<- (PP <: IN)))", // "SBARQ < (WHNP|WHADJP=target !< WRB) <+(SQ|SINV|S|VP) (VP !< (S < (VP < TO)) < (/^(?:VB|AUX)/ < " + copularWordRegex + " $++ (VP < VBN|VBD)) !<- PRT !<- (PP <: IN) $-- (NP !< /^-NONE-$/))", // // "Is he the man?" // "SQ <, (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (NP=target $-- (NP !< EX))" // }); /** * The "clausal complement" grammatical relation. A clausal complement of * a verb or adjective is a dependent clause with an internal subject which * functions like an object of the verb, or adjective. Clausal complements * for nouns are limited to complement clauses with a subset of nouns * like "fact" or "report". We analyze them the same (parallel to the * analysis of this class as "content clauses" in Huddleston and Pullum 2002). * Clausal complements are usually finite (though there * are occasional exceptions including remnant English subjunctives, and we * also classify the complement of causative "have" (She had him arrested) * in this category.

*

* Example:
* "He says that you like to swim" → * ccomp(says, like)
* "I am certain that he did it" → * ccomp(certain, did)
* "I admire the fact that you are honest" → * ccomp(fact, honest) */ public static final GrammaticalRelation CLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.English, "ccomp", "clausal complement", COMPLEMENT, "VP|SINV|S|ADJP|ADVP|NP(?:-.*)?", tregexCompiler, // Weird case of verbs with direct S complement that is not an infinitive or participle // ("I saw [him take the cake].", "making [him go crazy]") "VP < (S=target < (VP !<, TO|VBG|VBN) !$-- NP)", // the canonical case of a SBAR[that] with an overt "that" or "whether" "VP < (SBAR=target < (S <+(S) VP) <, (IN|DT < /^(?i:that|whether)$/))", // Conjoined SBAR otherwise in the canonical case "VP < (SBAR=target < (SBAR < (S <+(S) VP) <, (IN|DT < /^(?i:that|whether)$/)) < CC|CONJP)", // This finds most ccomp SBAR[that] with omission of that, but only ones without dobj "VP < (SBAR=target < (S < VP) !$-- NP !<, (IN|WHADVP) !<2 (IN|WHADVP $- ADVP|RB))", // Find ccomp SBAR[that] after dobj for clear marker verbs "VP < (/^V/ < " + ccompObjVerbRegex + ") < (SBAR=target < (S < VP) $-- NP !<, (IN|WHADVP) !<2 (IN|WHADVP $- ADVP|RB))", "VP < (SBAR=target < (S < VP) !$-- NP <, (WHADVP < (WRB < /^(?i:how)$/)))", "VP < @SBARQ=target", // Direct question: She asked "Who is in trouble" "VP < (/^VB/ < " + haveRegex + ") < (S=target < @NP < VP)", // !$-- @SBAR|S handles cases where the answer to the question // "What do they ccompVerb?" // is already answered by a different node // the ccompObjVerbRegex/NP test distinguishes "He told me why ..." // vs "They know my order when ..." "VP < (@SBAR=target !$-- @SBAR|S !$-- /^:$/ [ == @SBAR=sbar | <# @SBAR=sbar ] ) < (/^V/ < " + ccompVerbRegex + ") [ < (/^V/ < " + ccompObjVerbRegex + ") | < (=target !$-- NP) ] : (=sbar < (WHADVP|WHNP < (WRB !< /^(?i:how)$/) !$-- /^(?!RB|ADVP).*$/) !< (S < (VP < TO)))", // to find "...", he said or "...?" he asked. // We eliminate conflicts with conj by looking for CC // Matching against "!< (VP < TO|VBG|VBN)" matches against vmod // "!< (VP <1 (VP [ <1 VBG|VBN | <2 (VBG|VBN $-- ADVP) ])))" also matches against vmod "@S|SINV < (@S|SBARQ=target $+ /^(,|\\.|'')$/ !$- /^(?:CC|CONJP|:)$/ !$- (/^(?:,)$/ $- CC|CONJP) !< (VP < TO|VBG|VBN) !< (VP <1 (VP [ <1 VBG|VBN | <2 (VBG|VBN $-- ADVP) ]))) !< (@S !== =target $++ =target !$++ @CC|CONJP)", // ADVP is things like "As long as they spend ..." // < WHNP captures phrases such as "no matter what", "no matter how", etc "ADVP < (SBAR=target [ < WHNP | ( < (IN < /^(?i:as|that)/) < (S < (VP !< TO))) ])", "ADJP < (SBAR=target !< (IN < as) < S)", // ADJP is things like "sure (that) he'll lose" or for/to ones or object of comparison with than "than we were led to expect"; Leave aside as in "as clever as we thought. // That ... he know "S <, (SBAR=target <, (IN < /^(?i:that|whether)$/) !$+ VP)", // JJ catches a couple of funny NPs with heads like "enough" // Note that we eliminate SBAR which also match an vmod pattern "@NP < JJ|NN|NNS < (SBAR=target [ !<(S < (VP < TO )) | !$-- NP|NN|NNP|NNS ] )"); /** * An open clausal complement (xcomp) of a VP or an ADJP is a clausal * complement without its own subject, whose reference is determined by an * external subject. These complements are always non-finite. * The name xcomp is borrowed from Lexical-Functional Grammar. * (Mainly "TO-clause" are recognized, but also some VBG like "stop eating") *

*

* Examples:
* "I like to swim" → * xcomp(like, swim)
* "I am ready to leave" → * xcomp(ready, leave) */ public static final GrammaticalRelation XCLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.English, "xcomp", "xclausal complement", COMPLEMENT, "VP|ADJP|SINV", tregexCompiler, "VP < (S=target [ !$-- NP | $-- (/^V/ < " + xcompVerbRegex + ") ] !$- (NN < order) < (VP < TO))", // used to have !> (VP < (VB|AUX < be)) "ADJP < (S=target <, (VP <, TO))", "VP < (S=target !$- (NN < order) < (NP $+ NP|ADJP))", // to find "help sustain ... "VP <# (/^(?:VB|AUX)/ $+ (VP=target < VB|VBG))", "VP < (SBAR=target < (S !$- (NN < order) < (VP < TO))) !> (VP < (VB|AUX < be)) ", "VP < (S=target !$- (NN < order) <: NP) > VP", "VP < (/^VB/ $+ (@S=target < (@ADJP < /^JJ/ ! $-- @NP|S))) $-- (/^VB/ < " + copularWordRegex + " )", // stop eating // note that we eliminate parentheticals and clauses that could match a vmod // the clause !$-- VBG eliminates matches such as "What are you wearing dancing tonight" "(VP < (S=target < (VP < VBG ) !< NP !$- (/^,$/ [$- @NP|VP | $- (@PP $-- @NP ) |$- (@ADVP $-- @NP)]) !$-- /^:$/ !$-- VBG))", // Detects xcomp(becoming, requirement) in "Hand-holding is becoming an investment banking job requirement" // Also, xcomp(becoming, problem) in "Why is Dave becoming a problem?" "(VP $-- (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (/^VB/ < " + clausalComplementRegex + ") < NP=target)", "VP < (/^(?:VB|AUX)/ < " + clausalComplementRegex + ") < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] ) " + // The next qualification eliminates parentheticals that // come after the actual dobj " <# (__ !$++ (NP $++ (/^[:]$/ $++ =target))) ", // The old attr relation, used here to recover xcomp relations instead. "VP=vp < NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + " >># =vp) !$ (NP < EX)", // "Such a great idea this was" if "was" is the root, eg -makeCopulaHead "SINV <# (VP < (/^(?:VB|AUX)/ < " + copularWordRegex + ") $-- (NP $-- NP=target))"); /** * The RELATIVE grammatical relation is only here as a temporary * relation. This tregex triggering indicates either a dobj or a * pobj should be here. We figure this out in a post-processing * step by looking at the surrounding dependencies. */ public static final GrammaticalRelation RELATIVE = new GrammaticalRelation(Language.English, "rel", "relative", COMPLEMENT, "SBAR", tregexCompiler, "SBAR < (WHNP=target !< WRB) < (S < NP < (VP [ < SBAR | <+(VP) (PP <- IN|TO) | < (S < (VP < TO)) ] ))"); /** * The "referent" grammatical relation. A * referent of the Wh-word of a NP is the relative word introducing the relative clause modifying the NP. *

* Example:
* "I saw the book which you bought" → * ref(book, which)
* "I saw the book the cover of which you designed" → * ref(book, which) */ public static final GrammaticalRelation REFERENT = new GrammaticalRelation(Language.English, "ref", "referent", DEPENDENT); /** * The "expletive" grammatical relation. * This relation captures an existential there. *

*

* Example:
* "There is a statue in the corner" → * expl(is, there) */ public static final GrammaticalRelation EXPLETIVE = new GrammaticalRelation(Language.English, "expl", "expletive", DEPENDENT, "S|SQ|SINV", tregexCompiler, "S|SQ|SINV < (NP=target <+(NP) EX)"); /** * The "adjectival complement" grammatical relation. An * adjectival complement of a VP is an adjectival phrase which * functions as the complement (like an object of the verb); an adjectival * complement of a clause is the adjectival complement of the VP which is * the predicate of that clause.

*

* Example:
* "She looks very beautiful" → * acomp(looks, beautiful) */ public static final GrammaticalRelation ADJECTIVAL_COMPLEMENT = new GrammaticalRelation(Language.English, "acomp", "adjectival complement", COMPLEMENT, "VP", tregexCompiler, "VP [ < ADJP=target | ( < (/^VB/ [ ( < " + clausalComplementRegex + " $++ VP=target ) | $+ (@S=target < (@ADJP < /^JJ/ ! $-- @NP|S)) ] ) !$-- (/^VB/ < " + copularWordRegex + " )) ]"); /** * The "modifier" grammatical relation. A modifier of a VP is * any constituent that serves to modify the meaning of the VP * (but is not an ARGUMENT of that * VP); a modifier of a clause is an modifier of the VP which is * the predicate of that clause.

*

* Examples:
* "Last night, I swam in the pool" → * mod(swam, in the pool), * mod(swam, last night) */ public static final GrammaticalRelation MODIFIER = new GrammaticalRelation(Language.English, "mod", "modifier", DEPENDENT); /** * The "adverbial clause modifier" grammatical relation. An adverbial clause * modifier of some predicates, such as a VP or (inverted) sentence is a clause modifying the verb * (temporal clauses, consequences, conditional clauses, etc.). *

* Examples:
* "The accident happened as the night was falling" → * advcl(happened, falling)
* "If you know who did it, you should tell the teacher" → * advcl(tell, know) */ public static final GrammaticalRelation ADV_CLAUSE_MODIFIER = new GrammaticalRelation(Language.English, "advcl", "adverbial clause modifier", MODIFIER, "VP|S|SQ|SINV|SBARQ|NP|ADVP", tregexCompiler, "VP < (@SBAR=target <= (@SBAR [ < (IN !< /^(?i:that|whether)$/) | <: (SINV <1 /^(?:VB|MD|AUX)/) | < (RB|IN < so|now) < (IN < that) | <1 (ADVP < (RB < now)) <2 (IN < that) ] ))", "S|SQ|SINV < (SBAR|SBAR-TMP=target <, (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) !$-- /^(?!CC|CONJP|``|,|INTJ|PP(-.*)?).*$/ !$+ VP)", // to get "rather than" "S|SQ|SINV < (SBAR|SBAR-TMP=target <2 (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) !$-- /^(?!CC|CONJP|``|,|INTJ|PP(-.*)?$).*$/)", // this one might just be better, but at any rate license one with quotation marks or a conjunction beforehand "S|SQ|SINV < (SBAR|SBAR-TMP=target <, (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) !$+ @VP $+ /^,$/ $++ @NP)", // the last part should probably only be @SQ, but this captures some strays at no cost "SBARQ < (SBAR|SBAR-TMP|SBAR-ADV=target <, (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) $+ /^,$/ $++ @SQ|S|SBARQ)", // added the (S < (VP (VP < (VB|AUX < be)) < (S=target !$- /^,$/ < (VP < TO|VBG) !$-- NP)", // part of former purpcl [cdm 2010: this pattern was added by me in 2006, but it is just bad!] // // matches direct object for long dependencies in relative clause without explicit relative pronouns // "SBAR !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !$-- CC $-- NP > NP=target " + // // avoid conflicts with rcmod. TODO: we could look for // // empty nodes in this kind of structure and use that to // // find dobj, tmod, advmod, etc. won't help the parser, // // of course, but will help when converting a treebank // // which contains empties // // Example: "with the way his split-fingered fastball is behaving" // "!($-- @NP|WHNP|NML > @NP|WHNP <: (S !< (VP < TO)))", "NP < (NP $++ (SBAR=target < (IN < /^(?i:than)$/) !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !<: (S !< (VP < TO))) !$++ (CC $++ =target))", // this is for comparative or as ... as complements: sold more quickly [than they had expected] // available as long [as they install a crash barrier] "ADVP < ADVP < SBAR=target" ); /* * The "purpose clause modifier" grammatical relation has been discontinued * It is now just seen as a special case of an advcl. A purpose clause * modifier of a VP is a clause headed by "(in order) to" specifying a * purpose. Note: at present we only recognize ones that have * "in order to" or are fronted. Otherwise we can't use our surface representations to * distinguish these from xcomp's. We can also recognize "to" clauses * introduced by "be VBN". *

* Example:
* "He talked to the president in order to secure the account" → * purpcl(talked, secure) */ /** * The "relative clause modifier" grammatical relation. A relative clause * modifier of an NP is a relative clause modifying the NP. The link * points from the head noun of the NP to the head of the relative clause, * normally a verb. *

*

* Examples:
* "I saw the man you love" → * rcmod(man, love)
* "I saw the book which you bought" → * rcmod(book, bought) */ public static final GrammaticalRelation RELATIVE_CLAUSE_MODIFIER = new GrammaticalRelation(Language.English, "rcmod", "relative clause modifier", MODIFIER, "(?:WH)?(?:NP|NML|ADVP)(?:-.*)?", tregexCompiler, "@NP|WHNP|NML=np $++ (SBAR=target [ <+(SBAR) WHPP|WHNP | <: (S !< (VP < TO)) ]) !$-- @NP|WHNP|NML !$++ " + ETC_PAT + " !$++ " + FW_ETC_PAT + " > @NP|WHNP : (=np !$++ (CC|CONJP $++ =target))", "NP|NML $++ (SBAR=target < (WHADVP < (WRB @NP", // for case of relative clauses with no relativizer // (it doesn't distinguish whether actually gapped). "@NP|WHNP < RRC=target <# NP|WHNP|NML|DT|S", "@ADVP < (@ADVP < (RB < /where$/)) < @SBAR=target", "NP < (NP $++ (SBAR=target !< (IN < /^(?i:than|that|whether)$/) !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !<: (S !< (VP < TO))) !$++ (CC $++ =target))"); /* * The "complementizer" grammatical relation is a discontinued grammatical relation. A * A complementizer of a clausal complement was the word introducing it. * It only matched "that" or "whether". We've now merged this in with "mark" which plays a similar * role with other clausal modifiers. *

*

* Example:
* "He says that you like to swim" → * complm(like, that) */ /** * The "marker" grammatical relation. A marker is the word introducing a finite clause subordinate to another clause. * For a complement clause, this will typically be "that" or "whether". * For an adverbial clause, the marker is typically a preposition like "while" or "although". *

* Example:
* "U.S. forces have been engaged in intense fighting after insurgents launched simultaneous attacks" → * mark(launched, after) */ public static final GrammaticalRelation MARKER = new GrammaticalRelation(Language.English, "mark", "marker", MODIFIER, "SBAR(?:-TMP)?", tregexCompiler, "SBAR|SBAR-TMP < (IN|DT=target $++ S|FRAG)", "SBAR < (IN|DT=target < that|whether) [ $-- /^(?:VB|AUX)/ | $- NP|NN|NNS | > ADJP|PP | > (@NP|UCP|SBAR < CC|CONJP $-- /^(?:VB|AUX)/) ]"); /** * The "adjectival modifier" grammatical relation. An adjectival * modifier of an NP is any adjectival phrase that serves to modify * the meaning of the NP.

*

* Example:
* "Sam eats red meat" → * amod(meat, red)

* The relation amod is also used for multiword country adjectives, despite their * questionable treebank representation. *

* Example:
* "the West German economy" → * amod(German, West), * amod(economy, German) */ public static final GrammaticalRelation ADJECTIVAL_MODIFIER = new GrammaticalRelation(Language.English, "amod", "adjectival modifier", MODIFIER, "NP(?:-TMP|-ADV)?|NX|NML|NAC|WHNP|ADJP", tregexCompiler, "/^(?:NP(?:-TMP|-ADV)?|NX|NML|NAC|WHNP)$/ < (ADJP|WHADJP|JJ|JJR|JJS|JJP|VBN|VBG|VBD|IN=target !< (QP !< /^[$]$/) !$- CC)", // IN above is needed for "next" in "next week" etc., which is often tagged IN. "ADJP !< CC|CONJP < (JJ|NNP $ JJ|NNP=target)", // Cover the case of "John, 34, works at Stanford" - similar to an expression for appos "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (NP=target <: CD $- /^,$/ $-- /^(?:WH)?NP/ !$ CC|CONJP)"); /** * The "numeric modifier" grammatical relation. A numeric * modifier of an NP is any number phrase that serves to modify * the meaning of the NP.

*

* Example:
* "Sam eats 3 sheep" → * num(sheep, 3) */ public static final GrammaticalRelation NUMERIC_MODIFIER = new GrammaticalRelation(Language.English, "num", "numeric modifier", MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NML|NX|ADJP|WHADJP|QP", tregexCompiler, "/^(?:WH)?(?:NP|NX|NML)(?:-TMP|-ADV)?$/ < (CD|QP=target !$- CC)", // $ is so phrases such as "$ 100 million buyout" get amod(buyout, $) "/^(?:WH)?(?:NP|NX|NML)(?:-TMP|-ADV)?$/ < (ADJP=target <: (QP !< /^[$]$/))", // Phrases such as $ 100 million get converted from (QP ($ $) (CD 100) (CD million)) to // (QP ($ $) (QP (CD 100) (CD million))). This next tregex covers those phrases. // Note that the earlier tregexes are usually enough to cover those phrases, such as when // the QP is by itself in an ADJP or NP, but sometimes it can have other siblings such // as in the phrase "$ 100 million or more". In that case, this next expression is needed. "QP < QP=target < /^[$]$/"); /** * The "compound number modifier" grammatical relation. A compound number * modifier is a part of a number phrase or currency amount. *

* Example:
* "I lost $ 3.2 billion" → * number($, billion) */ public static final GrammaticalRelation NUMBER_MODIFIER = new GrammaticalRelation(Language.English, "number", "compound number modifier", MODIFIER, "QP|ADJP", tregexCompiler, "QP|ADJP < (/^(?:CD|$|#)$/=target !$- CC)"); /** * The "quantifier phrase modifier" grammatical relation. A quantifier * modifier is an element modifying the head of a QP constituent. *

* Example:
* "About 200 people came to the party" → * quantmod(200, About) */ public static final GrammaticalRelation QUANTIFIER_MODIFIER = new GrammaticalRelation(Language.English, "quantmod", "quantifier modifier", MODIFIER, "QP", tregexCompiler, "QP < IN|RB|RBR|RBS|PDT|DT|JJ|JJR|JJS|XS=target"); /** * The "noun compound modifier" grammatical relation. A noun compound * modifier of an NP is any noun that serves to modify the head noun. * Note that this has all nouns modify the rightmost a la Penn headship * rules. There is no intelligent noun compound analysis. *

* We eliminate nouns that are detected as part of a POS, since that * will turn into the dependencies denoting possession instead. * Note we have to include (VBZ < /^\'s$/) as part of the POS * elimination, since quite a lot of text such as * "yesterday's widely published sequester" was misannotated as a * VBZ instead of a POS. TODO: remove that if a revised PTB is ever * released. *

* Example:
* "Oil price futures" → * nn(futures, oil), * nn(futures, price)

*/ public static final GrammaticalRelation NOUN_COMPOUND_MODIFIER = new GrammaticalRelation(Language.English, "nn", "nn modifier", MODIFIER, "(?:WH)?(?:NP|NX|NAC|NML|ADVP|ADJP)(?:-TMP|-ADV)?", tregexCompiler, "/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < (NP|NML|NN|NNS|NNP|NNPS|FW|AFX=target $++ NN|NNS|NNP|NNPS|FW|CD=sister !<<- POS !<<- (VBZ < /^\'s$/) !$- /^,$/ !$++ (POS $++ =sister))", "/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < JJ|JJR|JJS=sister < (NP|NML|NN|NNS|NNP|NNPS|FW=target !<<- POS !<<- (VBZ < /^\'s$/) $+ =sister) <# NN|NNS|NNP|NNPS !<<- POS !<<- (VBZ < /^\'s$/) ", // in vitro, in vivo, etc., in Genia // matches against "etc etc" "ADJP|ADVP < (FW [ $- (FW=target !< /^(?i:etc)$/) | $- (IN=target < in|In) ] )"); /* * There used to be a relation "abbrev" for when abbreviations were defined in brackets after a noun * phrase, like "the Australian Broadcasting Corporation (ABC)", but it has now been disbanded, and * subsumed under appos. */ /** * The "appositional modifier" grammatical relation. An appositional * modifier of an NP is an NP that serves to modify * the meaning of the NP. It includes parenthesized examples, as well as defining abbreviations. *

* Examples:
* "Sam, my brother, eats red meat" → * appos(Sam, brother)
* "Bill (John's cousin)" → appos(Bill, cousin). * * "The Australian Broadcasting Corporation (ABC)" → * appos(Corporation, ABC) */ public static final GrammaticalRelation APPOSITIONAL_MODIFIER = new GrammaticalRelation(Language.English, "appos", "appositional modifier", MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?", tregexCompiler, "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (NP=target !<: CD $- /^,$/ $-- /^(?:WH)?NP/) !< CC|CONJP !< " + FW_ETC_PAT + " !< " + ETC_PAT, "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (PRN=target < (NP < /^(?:NN|CD)/ $-- /^-LRB-$/ $+ /^-RRB-$/))", // NP-ADV is a npadvmod, NP-TMP is a tmod "@WHNP|NP < (NP=target !<: CD <, /^-LRB-$/ <` /^-RRB-$/ $-- /^(?:WH)?NP/ !$ CC|CONJP)", // TODO: next pattern with NNP doesn't work because leftmost NNP is deemed head in a // structure like (NP (NNP Norway) (, ,) (NNP Verdens_Gang) (, ,)) "NP|NP-TMP|NP-ADV < (NNP $+ (/^,$/ $+ NNP=target)) !< CC|CONJP !< " + FW_ETC_PAT + " !< " + ETC_PAT, // find abbreviations // for biomedical English, the former NNP heuristic really doesn't work, because they use NN for all chemical entities // while not unfoolable, this version produces less false positives and more true positives. "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (PRN=target <, /^-LRB-$/ <- /^-RRB-$/ !<< /^(?:POS|(?:WP|PRP)\\$|[,$#]|CC|RB|CD)$/ <+(NP) (NNP|NN < /^(?:[A-Z]\\.?){2,}/) )", // Handles cases such as "(NP (Her daughter) Jordan)" "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (NP=target <: NNP $- (/^(?:WH)?NP/ !< POS)) !< CC|CONJP !< " + FW_ETC_PAT + " !< " + ETC_PAT); /** * The "discourse element" grammatical relation. This is used for interjections and * other discourse particles and elements (which are not clearly linked to the structure * of the sentence, except in an expressive way). We generally follow the * guidelines of what the Penn Treebanks count as an INTJ. They * define this to include: interjections (oh, uh-huh, Welcome), fillers (um, ah), * and discourse markers (well, like, actually, but not: you know). * We also use it for emoticons. */ public static final GrammaticalRelation DISCOURSE_ELEMENT = new GrammaticalRelation(Language.English, "discourse", "discourse element", MODIFIER, ".*", tregexCompiler, "__ < (NFP=target [ < " + WESTERN_SMILEY + " | < " + ASIAN_SMILEY + " ] )", "__ [ < INTJ=target | < (PRN=target <1 /^(?:,|-LRB-)$/ <2 INTJ [ !<3 __ | <3 /^(?:,|-RRB-)$/ ] ) ]"); /** * The "verb modifier" grammatical relation. A verb * modifier of an NP, VP, or S is a S/VP[part] that serves to modify * the meaning of the NP or VP. *

* Examples:
* "truffles picked during the spring are tasty" → * vmod(truffles, picked)
* "Bill picked Fred for the team demonstrating his incompetence" → * vmod(picked, demonstrating)
* "points to establish are ..." → * vmod(points, establish)
* "who am i to judge" → * vmod(who, judge)
*/ public static final GrammaticalRelation VERBAL_MODIFIER = new GrammaticalRelation(Language.English, "vmod", "verb modifier", MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NML|NX|VP|S|SINV|SBARQ", tregexCompiler, "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV|NML|NX < (VP=target < VBG|VBN|VBD $-- @NP|NML|NX)", // also allow VBD since it quite often occurs in treebank errors and parse errors // to get "MBUSA, headquartered ..." // Allows an adverb to come before the participle "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV|NML|NX < (/^,$/ $+ (VP=target [ <1 VBG|VBN | <2 (VBG|VBN $-- ADVP) ]))", // to get "John, knowing ..., announced " // allowing both VP=verb and VP <1 VP=verb catches // conjunctions of two VP clauses "S|SINV < (S=target (< VP=verb | < (VP <1 VP=verb)) [ $- (/^,$/ [ $- @NP | $- (@PP $ @NP) ] ) | $+ (/^,$/ $+ @NP) ] ) : (=verb [ <1 VBG|VBN | <2 (VBG|VBN $-- ADVP) ])", "(VP < (@S=target < (VP [ <1 VBG|VBN | <2 (VBG|VBN $-- ADVP) ]) $- (/^,$/ [$- @NP|VP | $- (@PP $-- @NP ) |$- (@ADVP $-- @NP)])))", // What are you wearing dancing tonight? "(VP < (S=target < (VP < VBG) $-- VBG=ing !$-- (/^[:]$/ $-- =ing)))", // We could use something like this keying off -ADV annotation, but not yet operational, as we don't keep S-ADV, only NP-ADV // "VP < (/^S-ADV$/=target < (VP <, VBG|VBN) )", // they wrote asking the SEC to ... "VP < (S=target $-- NP < (VP < TO) !$-- (/^V/ < " + xcompVerbRegex + ") )", "/^NP(?:-[A-Z]+)?$/ < (S=target < (VP < TO) $-- NP|NN|NNP|NNS)", "/^NP(?:-[A-Z]+)?$/ < (SBAR=target < (S < (VP < TO)) $-- NP|NN|NNP|NNS)", "SBARQ < WHNP < (S=target < (VP <1 TO))"); /** * The "adverbial modifier" grammatical relation. An adverbial * modifier of a word is a (non-clausal) RB or ADVP that serves to modify * the meaning of the word.

*

* Examples:
* "genetically modified food" → * advmod(modified, genetically)
* "less often" → * advmod(often, less) */ public static final GrammaticalRelation ADVERBIAL_MODIFIER = new GrammaticalRelation(Language.English, "advmod", "adverbial modifier", MODIFIER, "VP|ADJP|WHADJP|ADVP|WHADVP|S|SBAR|SINV|SQ|SBARQ|XS|(?:WH)?(?:PP|NP)(?:-TMP|-ADV)?|RRC|CONJP|JJP", tregexCompiler, "/^(?:VP|ADJP|JJP|WHADJP|SQ?|SBARQ?|SINV|XS|RRC|(?:WH)?NP(?:-TMP|-ADV)?)$/ < (RB|RBR|RBS|WRB|ADVP|WHADVP=target !< " + NOT_PAT + " !< " + ETC_PAT + ")", // avoids adverb conjunctions matching as advmod; added JJ to catch How long // "!< no" so we can get neg instead for "no foo" when no is tagged as RB // we allow CC|CONJP as long as it is not between the target and the head // TODO: perhaps remove that last clause if we transform // more and more, less and less, etc. "ADVP|WHADVP < (RB|RBR|RBS|WRB|ADVP|WHADVP|JJ=target !< " + NOT_PAT + " !< /^(?i:no)$/ !< " + ETC_PAT + ") [ !< /^CC|CONJP$/ | ( <#__=head !< (/^CC|CONJP$/ [ ($++ =head $-- =target) | ($-- =head $++ =target) ])) ]", //this one gets "at least" advmod(at, least) or "fewer than" advmod(than, fewer) "SBAR < (WHNP=target < WRB)", "SBARQ <, WHADVP=target", "XS < JJ=target", // for PP, only ones before head, or after NP, since others afterwards are pcomp "/(?:WH)?PP(?:-TMP|-ADV)?$/ <# (__ $-- (RB|RBR|RBS|WRB|ADVP|WHADVP=target !< " + NOT_PAT + " !< " + ETC_PAT + "))", "/(?:WH)?PP(?:-TMP|-ADV)?$/ < @NP|WHNP < (RB|RBR|RBS|WRB|ADVP|WHADVP=target !< " + NOT_PAT + " !< " + ETC_PAT + ")", "CONJP < (RB=target !< " + NOT_PAT + " !< " + ETC_PAT + ")"); /** * The "negation modifier" grammatical relation. The negation modifier * is the relation between a negation word and the word it modifies. *

* Examples:
* "Bill is not a scientist" → * neg(scientist, not)
* "Bill doesn't drive" → * neg(drive, n't) */ public static final GrammaticalRelation NEGATION_MODIFIER = new GrammaticalRelation(Language.English, "neg", "negation modifier", ADVERBIAL_MODIFIER, "VP|ADJP|S|SBAR|SINV|SQ|NP(?:-TMP|-ADV)?|FRAG|CONJP|PP|NAC|NML|NX|ADVP|WHADVP", tregexCompiler, "/^(?:VP|NP(?:-TMP|-ADV)?|ADJP|SQ|S|FRAG|CONJP|PP)$/< (RB=target < " + NOT_PAT + ")", "VP|ADJP|S|SBAR|SINV|FRAG < (ADVP=target <# (RB < " + NOT_PAT + "))", "VP > SQ $-- (RB=target < " + NOT_PAT + ")", // the commented out parts were relevant for the "det", // but don't seem to matter for the "neg" relation "/^(?:NP(?:-TMP|-ADV)?|NAC|NML|NX|ADJP|ADVP)$/ < (DT|RB=target < /^(?i:no)$/ " + /* !$++ CC */ " $++ /^(?:N[MNXP]|CD|JJ|JJR|FW|ADJP|QP|RB|RBR|PRP(?![$])|PRN)/ " + /* =det !$++ (/^PRP[$]|POS/ $++ =det !$++ (/''/ $++ =det)) */ ")", // catches "no more", possibly others as well // !< CC|CONJP catches phrases such as "no more or less", which maybe should be preconj "ADVP|WHADVP < (RB|RBR|RBS|WRB|ADVP|WHADVP|JJ=target < /^(?i:no)$/) !< CC|CONJP"); /** * The "noun phrase as adverbial modifier" grammatical relation. * This relation captures various places where something syntactically a noun * phrase is used as an adverbial modifier in a sentence. These usages include: *

    *
  • A measure phrase, which is the relation between * the head of an ADJP/ADVP and the head of a measure-phrase modifying the ADJP/ADVP. *

    * Example:
    * "The director is 65 years old" → * npadvmod(old, years) *

  • *
  • Noun phrases giving extent inside a VP which are not objects *

    * Example:
    * "Shares eased a fraction" → * npadvmod(eased, fraction) *

  • *
  • Financial constructions involving an adverbial or PP-like NP, notably * the following construction where the NP means "per share" *

    * Example:
    * "IBM earned $ 5 a share" → * npadvmod($, share) *

  • *
  • Reflexives *

    * Example:
    * "The silence is itself significant" → * npadvmod(significant, itself) *

  • *
  • Certain other absolutive NP constructions. *

    * Example:
    * "90% of Australians like him, the most of any country" → * npadvmod(like, most) *

* A temporal modifier (tmod) is a subclass of npadvmod which is distinguished * as a separate relation. */ public static final GrammaticalRelation NP_ADVERBIAL_MODIFIER = new GrammaticalRelation(Language.English, "npadvmod", "noun phrase adverbial modifier", MODIFIER, "VP|(?:WH)?(?:NP|ADJP|ADVP|PP)(?:-TMP|-ADV)?", tregexCompiler, "@ADVP|ADJP|WHADJP|WHADVP|PP|WHPP <# (JJ|JJR|IN|RB|RBR !< notwithstanding $- (@NP=target !< NNP|NNPS))", // one word nouns like "cost efficient", "ice-free" "@ADJP < (NN=target $++ /^JJ/) !< CC|CONJP", "@NP|WHNP < /^NP-ADV/=target", // Mr. Bush himself ..., in a couple different parse // patterns. Looking for CC|CONJP leaves out phrases such // as "he and myself" "@NP|WHNP [ < (NP=target <: (PRP < " + selfRegex + ")) | < (PRP=target < " + selfRegex + ") ] : (=target $-- NP|NN|NNS|NNP|NNPS|PRP=noun !$-- (/^,|CC|CONJP$/ $-- =noun))", // this next one is for weird financial listings: 4.7% three months "@NP <1 (@NP <<# /^%$/) <2 (@NP=target <<# days|month|months) !<3 __", "@VP < /^NP-ADV/=target"); /** * The "temporal modifier" grammatical relation. A temporal * modifier of a VP or an ADJP is any constituent that serves to modify the * meaning of the VP or the ADJP by specifying a time; a temporal modifier of a * clause is an temporal modifier of the VP which is the * predicate of that clause.

*

* Example:
* "Last night, I swam in the pool" → * {@code tmod}(swam, night) */ public static final GrammaticalRelation TEMPORAL_MODIFIER = new GrammaticalRelation(Language.English, "tmod", "temporal modifier", NP_ADVERBIAL_MODIFIER, "VP|S|ADJP|PP|SBAR|SBARQ|NP|RRC", tregexCompiler, "VP|ADJP|RRC [ < NP-TMP=target | < (VP=target <# NP-TMP !$ /^,|CC|CONJP$/) | < (NP=target <# (/^NN/ < " + timeWordRegex + ") !$+ (/^JJ/ < old)) ]", // CDM Jan 2010: For constructions like "during the same period last year" // combining expressions into a single disjunction should improve speed a little "@PP < (IN|TO|VBG|FW $++ (@NP [ $+ NP-TMP=target | $+ (NP=target <# (/^NN/ < " + timeWordRegex + ")) ]))", "S < (NP-TMP=target $++ VP $ NP )", "S < (NP=target <# (/^NN/ < " + timeWordRegex + ") $++ (NP $++ VP))", // matches when relative clauses as temporal modifiers of verbs! "SBAR < (@WHADVP < (WRB < when)) < (S < (NP $+ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) ))) !$-- CC $-- NP > NP=target", "SBARQ < (@WHNP=target <# (/^NN/ < " + timeWordRegex + ")) < (SQ < @NP)", "NP < NP-TMP=target"); /** * The "multi-word expression" grammatical relation. * This covers various multi-word constructions for which it would * seem pointless or arbitrary to claim grammatical relations between words: * as well as, rather than, instead of, but also; * such as, because of, all but, in addition to .... *

* Examples:
* "dogs as well as cats" → * mwe(well, as)
* mwe(well, as)

* "fewer than 700 bottles" → * mwe(than, fewer) */ public static final GrammaticalRelation MULTI_WORD_EXPRESSION = new GrammaticalRelation(Language.English, "mwe", "multi-word expression", MODIFIER, "PP|XS|ADVP|CONJP", tregexCompiler, "PP|XS < (IN|TO < as|of|at|to|in) < (JJ|IN|JJR|JJS|NN=target < such|because|Because|least|instead|due|Due|addition|to)", "ADVP < (RB|IN < well) < (IN|RB|JJS=target < as)", // TODO: perhaps the phrase "all but" is more like "all" and should have that as the head "ADVP < (DT=target < all) < (CC < but)", "CONJP < (RB < rather|well|instead) < (RB|IN=target < as|than|of)", "CONJP < (IN < in) < (NN|TO=target < addition|to)", // todo: note inconsistent head finding for "rather than"! "XS < JJR|JJS=target" // more than, fewer than, well over -- maybe change some of these? ); /* mihai: this block needs to be uncommented to get the KBP 2010 system to work (due to the cached sentences using old code) * (Note: in 2011, the measure phrase relation was collapsed into the scope of npadvmod, rather than being separated out.) ** * The "measure-phrase" grammatical relation. The measure-phrase is the relation between * the head of an ADJP/ADVP and the head of a measure-phrase modifying the ADJP/ADVP. *

* Example:
* "The director is 65 years old" → * measure(old, years) * public static final GrammaticalRelation MEASURE_PHRASE = new GrammaticalRelation(Language.English, "measure", "measure-phrase", MODIFIER, "ADJP|ADVP", tregexCompiler, new String[] { "ADJP <- JJ <, (NP=target !< NNP)", "ADVP|ADJP <# (JJ|IN $- NP=target)" }); */ // mihai: end block /** * The "determiner" grammatical relation. *

* Examples:
* "The man is here" → det(man,the)
* "Which man do you prefer?" → det(man,which)
* (The ADVP match is because sometimes "a little" or "every time" is tagged * as an AVDVP with POS tags straight under it.) */ public static final GrammaticalRelation DETERMINER = new GrammaticalRelation(Language.English, "det", "determiner", MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NAC|NML|NX|X|ADVP|ADJP", tregexCompiler, "/^(?:NP(?:-TMP|-ADV)?|NAC|NML|NX|X)$/ < (DT=target !< /^(?i:either|neither|both|no)$/ !$+ DT !$++ CC $++ /^(?:N[MNXP]|CD|JJ|FW|ADJP|QP|RB|PRP(?![$])|PRN)/=det !$++ (/^PRP[$]|POS/ $++ =det !$++ (/''/ $++ =det)))", "NP|NP-TMP|NP-ADV < (DT=target [ (< /^(?i:either|neither|both)$/ !$+ DT !$++ CC $++ /^(?:NN|NX|NML)/ !$++ (NP < CC)) | " + "(!< /^(?i:either|neither|both|no)$/ $++ CC $++ /^(?:NN|NX|NML)/) | " + "(!< /^(?i:no)$/ $++ (/^JJ/ !$+ /^NN/) !$++CC !$+ DT) ] )", // "NP|NP-TMP|NP-ADV < (RB=target $++ (/^PDT$/ $+ /^NN/))", // todo: This matches nothing. Was it meant to be a PDT rule for (NP almost/RB no/DT chairs/NNS)? "NP|NP-TMP|NP-ADV <<, PRP <- (NP|DT|RB=target <<- all|both|each)", // we all, them all; various structures "WHNP < (NP $-- (WHNP=target < WDT))", // testing against CC|CONJP avoids conflicts with preconj in // phrases such as "both foo and bar" // however, we allow WDT|WP to account for "what foo or bar" and "whatever foo or bar" "@WHNP|ADVP|ADJP < (/^(?:NP|NN|CD|RBS|JJ)/ $-- (DT|WDT|WP=target !< /^(?i:no)$/ [ ==WDT|WP | !$++ CC|CONJP ]))", "@NP < (/^(?:NP|NN|CD|RBS)/ $-- WDT|WP=target)"); /** * The "predeterminer" grammatical relation. *

* Example:
* "All the boys are here" → predet(boys,all) */ public static final GrammaticalRelation PREDETERMINER = new GrammaticalRelation(Language.English, "predet", "predeterminer", MODIFIER, "(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?", tregexCompiler, "/^(?:(?:WH)?NP(?:-TMP|-ADV)?|NX|NAC|NML)$/ < (PDT|DT=target $+ /^(?:DT|WP\\$|PRP\\$)$/ $++ /^(?:NN|NX|NML)/ !$++ CC)", "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (PDT|DT=target $+ DT $++ (/^JJ/ !$+ /^NN/)) !$++ CC", "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < PDT=target <- DT"); /** * The "preconjunct" grammatical relation. *

* Example:
* "Both the boys and the girls are here" → preconj(boys,both) */ public static final GrammaticalRelation PRECONJUNCT = new GrammaticalRelation(Language.English, "preconj", "preconjunct", MODIFIER, "S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR|NP(?:-TMP|-ADV)?", tregexCompiler, "NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ $++ CC)", "NP|NP-TMP|NP-ADV|NX|NML < (CONJP=target < (RB < /^(?i:not)$/) < (RB|JJ < /^(?i:only|merely|just)$/) $++ CC|CONJP)", // This matches weird/wrong NP-internal preconjuncts where you get (NP PDT (NP NP CC NP)) or similar "NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ ) < (NP < CC)", "/^S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR$/ < (PDT|DT|CC=target < /^(?i:either|neither|both)$/ $++ CC)", "/^S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR$/ < (CONJP=target < (RB < /^(?i:not)$/) < (RB|JJ < /^(?i:only|merely|just)$/) $++ CC|CONJP)"); /** * The "possession" grammatical relation between the possessum and the possessor.

*

* Examples:
* "their offices" → * {@code poss}(offices, their)
* "Bill 's clothes" → * {@code poss}(clothes, Bill) */ public static final GrammaticalRelation POSSESSION_MODIFIER = new GrammaticalRelation(Language.English, "poss", "possession modifier", MODIFIER, "(?:WH)?(NP|ADJP|INTJ|PRN|NAC|NX|NML)(?:-.*)?", tregexCompiler, "/^(?:WH)?(?:NP|INTJ|ADJP|PRN|NAC|NX|NML)(?:-.*)?$/ < /^(?:WP\\$|PRP\\$)$/=target", // todo: possessive pronoun under ADJP needs more work for one case of (ADJP his or her own) // basic NP possessive: we want to allow little conjunctions in head noun (NP (NP ... POS) NN CC NN) but not falsely match when there are conjoined NPs. See tests. "/^(?:WH)?(?:NP|NML)(?:-.*)?$/ [ < (WHNP|WHNML|NP|NML=target [ < POS | < (VBZ < /^'s$/) ] ) !< (CC|CONJP $++ WHNP|WHNML|NP|NML) | < (WHNP|WHNML|NP|NML=target < (CC|CONJP $++ WHNP|WHNML|NP|NML) < (WHNP|WHNML|NP|NML [ < POS | < (VBZ < /^'s$/) ] )) ]", // handle a few too flat NPs // note that ' matches both ' and 's "/^(?:WH)?(?:NP|NML|NX)(?:-.*)?$/ < (/^NN|NP/=target $++ (POS=pos < /\'/ $++ /^NN/) !$++ (/^NN|NP/ $++ =pos))"); /** * The "possessive" grammatical relation. This is the relation given to * 's (or ' with plurals).

*

* Example:
* "John's book" → * possessive(John, 's) */ public static final GrammaticalRelation POSSESSIVE_MODIFIER = new GrammaticalRelation(Language.English, "possessive", "possessive modifier", MODIFIER, "(?:WH)?(?:NP|NML)(?:-TMP|-ADV)?", tregexCompiler, "/^(?:WH)?(?:NP|NML)(?:-TMP|-ADV)?$/ < POS=target", "/^(?:WH)?(?:NP|NML)(?:-TMP|-ADV)?$/ < (VBZ=target < /^'s$/)"); /** * The "prepositional modifier" grammatical relation. A prepositional * modifier of a verb, adjective, or noun is any prepositional phrase that serves to modify * the meaning of the verb, adjective, or noun. * We also generate prep modifiers of PPs to account for treebank (PP PP PP) constructions * (from 1984 through 2002).

*

* Examples:
* "I saw a cat in a hat" → * prep(cat, in)
* "I saw a cat with a telescope" → * prep(saw, with)
* "He is responsible for meals" → * prep(responsible, for) */ public static final GrammaticalRelation PREPOSITIONAL_MODIFIER = new GrammaticalRelation(Language.English, "prep", "prepositional modifier", MODIFIER, ".*", tregexCompiler, "/^(?:(?:WH)?(?:NP|ADJP|ADVP|NX|NML)(?:-TMP|-ADV)?|VP|NAC|SQ|FRAG|PRN|X|RRC)$/ < (WHPP|WHPP-TMP|PP|PP-TMP=target !$- (@CC|CONJP $- __)) !<- " + ETC_PAT + " !<- " + FW_ETC_PAT, "/^(?:(?:WH)?(?:NP|ADJP|ADVP|NX|NML)(?:-TMP|-ADV)?|VP|NAC|SQ|FRAG|PRN|X|RRC)$/ < (S=target <: WHPP|WHPP-TMP|PP|PP-TMP)", // only allow a PP < PP one if there is not a conj, verb, or other pattern that matches pcomp under it. Else pcomp "WHPP|WHPP-TMP|WHPP-ADV|PP|PP-TMP|PP-ADV < (WHPP|WHPP-TMP|WHPP-ADV|PP|PP-TMP|PP-ADV=target !$- IN|VBG|VBN|TO) !< @CC|CONJP", "S|SINV < (PP|PP-TMP=target !< SBAR) < VP|S", "SBAR|SBARQ < /^(?:WH)?PP/=target < S|SQ", "@NP < (@UCP|PRN=target <# @PP)"); /** * The "phrasal verb particle" grammatical relation. The "phrasal verb particle" * relation identifies phrasal verb.

*

* Example:
* "They shut down the station." → * prt(shut, down) */ public static final GrammaticalRelation PHRASAL_VERB_PARTICLE = new GrammaticalRelation(Language.English, "prt", "phrasal verb particle", MODIFIER, "VP|ADJP", tregexCompiler, "VP < PRT=target", "ADJP < /^VB/ < RP=target"); /** * The "parataxis" grammatical relation. Relation between the main verb of a sentence * and other sentential elements, such as a sentential parenthetical, a sentence after a ":" or a ";", when two * sentences are juxtaposed next to each other without any coordinator or subordinator, etc. *

* Examples:
* "The guy, John said, left early in the morning." → parataxis(left,said)
* " */ public static final GrammaticalRelation PARATAXIS = new GrammaticalRelation(Language.English, "parataxis", "parataxis", DEPENDENT, "S|VP", tregexCompiler, "VP < (PRN=target < S|SINV|SBAR)", // parenthetical "VP $ (PRN=target [ < S|SINV|SBAR | < VP < @NP ] )", // parenthetical // The next relation handles a colon between sentences // and similar punct such as -- // Sometimes these are lists, especially in the case of ";", // so we don't trigger if there is a CC|CONJP that occurs // anywhere other than the first child // First child can occur in rare circumstances such as // "But even if he agrees -- which he won't -- etc etc" "S|VP < (/^:$/ $+ /^S/=target) !<, (__ $++ CC|CONJP)", // two juxtaposed sentences; common in web materials (but this also matches quite a few wsj things) "@S < (@S|SBARQ $++ @S|SBARQ=target !$++ @CC|CONJP)", "@S|VP < (/^:$/ $-- /^V/ $+ @NP=target) !< @CONJP|CC" // sometimes CC cases are right node raising, etc. ); /** * The "goes with" grammatical relation. This corresponds to use of the GW (goes with) part-of-speech tag * in the recent Penn Treebanks. It marks partial words that should be combined with some other word.

*

* Example:
* "They come here with out legal permission." → * goeswith(out, with) */ public static final GrammaticalRelation GOES_WITH = new GrammaticalRelation(Language.English, "goeswith", "goes with", MODIFIER, ".*", tregexCompiler, "__ < GW=target"); /** * The "semantic dependent" grammatical relation has been * introduced as a supertype for the controlling subject relation. */ public static final GrammaticalRelation SEMANTIC_DEPENDENT = new GrammaticalRelation(Language.English, "sdep", "semantic dependent", DEPENDENT); /** * The "agent" grammatical relation. The agent of a passive VP * is the complement introduced by "by" and doing the action.

*

* Example:
* "The man has been killed by the police" → * agent(killed, police) */ public static final GrammaticalRelation AGENT = new GrammaticalRelation(Language.English, "agent", "agent", DEPENDENT); // TODO would be nice to have this set up automatically... /** * A list of GrammaticalRelation values. New GrammaticalRelations must be * added to this list (until we make this an enum!). * The GR recognizers are tried in the order listed. A taxonomic * relationship trumps an ordering relationship, but otherwise, the first * listed relation will appear in dependency output. Known ordering * constraints where both match include: *

    *
  • NUMERIC_MODIFIER < ADJECTIVAL_MODIFIER *
*/ @SuppressWarnings({"RedundantArrayCreation"}) private static final List values = Generics.newArrayList(Arrays.asList(new GrammaticalRelation[] { GOVERNOR, DEPENDENT, PREDICATE, AUX_MODIFIER, AUX_PASSIVE_MODIFIER, COPULA, CONJUNCT, COORDINATION, PUNCTUATION, ARGUMENT, SUBJECT, NOMINAL_SUBJECT, NOMINAL_PASSIVE_SUBJECT, CLAUSAL_SUBJECT, CLAUSAL_PASSIVE_SUBJECT, COMPLEMENT, OBJECT, DIRECT_OBJECT, INDIRECT_OBJECT, PREPOSITIONAL_OBJECT, PREPOSITIONAL_COMPLEMENT, CLAUSAL_COMPLEMENT, XCLAUSAL_COMPLEMENT, MARKER, RELATIVE, REFERENT, EXPLETIVE, ADJECTIVAL_COMPLEMENT, MODIFIER, ADV_CLAUSE_MODIFIER, TEMPORAL_MODIFIER, RELATIVE_CLAUSE_MODIFIER, NUMERIC_MODIFIER, ADJECTIVAL_MODIFIER, NOUN_COMPOUND_MODIFIER, APPOSITIONAL_MODIFIER, VERBAL_MODIFIER, ADVERBIAL_MODIFIER, NEGATION_MODIFIER, MULTI_WORD_EXPRESSION, DETERMINER, PREDETERMINER, PRECONJUNCT, POSSESSION_MODIFIER, POSSESSIVE_MODIFIER, PREPOSITIONAL_MODIFIER, PHRASAL_VERB_PARTICLE, SEMANTIC_DEPENDENT, AGENT, NUMBER_MODIFIER, QUANTIFIER_MODIFIER, NP_ADVERBIAL_MODIFIER, PARATAXIS, DISCOURSE_ELEMENT, GOES_WITH, })); // Cache frequently used views of the values list private static final List unmodifiableValues = Collections.unmodifiableList(values); private static final List synchronizedValues = Collections.synchronizedList(values); private static final List unmodifiableSynchronizedValues = Collections.unmodifiableList(values); public static final ReadWriteLock valuesLock = new ReentrantReadWriteLock(); // Map from English GrammaticalRelation short names to their corresponding // GrammaticalRelation objects public static final Map shortNameToGRel = new ConcurrentHashMap(); static { for (GrammaticalRelation gr : values(true)) { shortNameToGRel.put(gr.toString().toLowerCase(), gr); } } public static List values() { return values(false); } public static List values(boolean threadSafe) { return threadSafe? unmodifiableSynchronizedValues : unmodifiableValues; } public static Lock valuesLock() { return valuesLock.readLock(); } /** * This method is meant to be called when you want to add a relation * to the values list in a thread-safe manner. Currently, this method * is always used in preference to values.add() because we expect to * add new EnglishGrammaticalRelations very rarely, so the eased * concurrency seems to outweigh the fairly slight cost of thread-safe * access. * @param relation the relation to be added to the values list */ private static void threadSafeAddRelation(GrammaticalRelation relation) { valuesLock.writeLock().lock(); try { // try-finally structure taken from Javadoc code sample for ReentrantReadWriteLock synchronizedValues.add(relation); shortNameToGRel.put(relation.toString(), relation); } finally { valuesLock.writeLock().unlock(); } } // the exhaustive list of conjunction relations private static final Map conjs = Generics.newConcurrentHashMap(); public static Collection getConjs() { return conjs.values(); } /** * The "conj" grammatical relation. Used to collapse conjunct relations. * They will be turned into conj_word, where "word" is a conjunction. * * @param conjunctionString The conjunction to make a GrammaticalRelation out of * @return A grammatical relation for this conjunction */ public static GrammaticalRelation getConj(String conjunctionString) { GrammaticalRelation result = conjs.get(conjunctionString); if (result == null) { synchronized(conjs) { result = conjs.get(conjunctionString); if (result == null) { result = new GrammaticalRelation(Language.English, "conj", "conj_collapsed", CONJUNCT, conjunctionString); conjs.put(conjunctionString, result); threadSafeAddRelation(result); } } } return result; } // the exhaustive list of preposition relations private static final Map preps = Generics.newConcurrentHashMap(); private static final Map prepsC = Generics.newConcurrentHashMap(); public static Collection getPreps() { return preps.values(); } public static Collection getPrepsC() { return prepsC.values(); } /** * The "prep" grammatical relation. Used to collapse prepositions.

* They will be turned into prep_word, where "word" is a preposition * * @param prepositionString The preposition to make a GrammaticalRelation out of * @return A grammatical relation for this preposition */ public static GrammaticalRelation getPrep(String prepositionString) { GrammaticalRelation result = preps.get(prepositionString); if (result == null) { synchronized(preps) { result = preps.get(prepositionString); if (result == null) { result = new GrammaticalRelation(Language.English, "prep", "prep_collapsed", PREPOSITIONAL_MODIFIER, prepositionString); preps.put(prepositionString, result); threadSafeAddRelation(result); } } } return result; } /** * The "prepc" grammatical relation. Used to collapse preposition * complements.

* They will be turned into prep_word, where "word" is a preposition * * @param prepositionString The preposition to make a GrammaticalRelation out of * @return A grammatical relation for this preposition */ public static GrammaticalRelation getPrepC(String prepositionString) { GrammaticalRelation result = prepsC.get(prepositionString); if (result == null) { synchronized(prepsC) { result = prepsC.get(prepositionString); if (result == null) { result = new GrammaticalRelation(Language.English, "prepc", "prepc_collapsed", DEPENDENT, prepositionString); prepsC.put(prepositionString, result); threadSafeAddRelation(result); } } } return result; } /** * Returns the EnglishGrammaticalRelation having the given string * representation (e.g. "nsubj"), or null if no such is found. * * @param s The short name of the GrammaticalRelation * @return The EnglishGrammaticalRelation with that name */ public static GrammaticalRelation valueOf(String s) { return GrammaticalRelation.valueOf(s, synchronizedValues); // // TODO does this need to be changed? // // modification NOTE: do not commit until go-ahead // // If this is a collapsed relation (indicated by a "_" separating // // the type and the dependent, instantiate a collapsed version. // // Currently handcode against conjunctions and prepositions, but // // should do this in a more robust fashion. // String[] tuples = s.trim().split("_", 2); // if (tuples.length == 2) { // String reln = tuples[0]; // String specific = tuples[1]; // if (reln.equals(PREPOSITIONAL_MODIFIER.getShortName())) { // return getPrep(specific); // } else if (reln.equals(CONJUNCT.getShortName())) { // return getConj(specific); // } // } // // return null; } /** * Returns an EnglishGrammaticalRelation based on the argument. * It works if passed a GrammaticalRelation or the String * representation of one (e.g. "nsubj"). It returns null * for other classes or if no string match is found. * * @param o A GrammaticalRelation or String * @return The EnglishGrammaticalRelation with that name */ @SuppressWarnings("unchecked") public static GrammaticalRelation valueOf(Object o) { if (o instanceof GrammaticalRelation) { return (GrammaticalRelation) o; } else if (o instanceof String) { return valueOf((String) o); } else { return null; } } /** * Prints out the English grammatical relations hierarchy. * See EnglishGrammaticalStructure for a main method that * will print the grammatical relations of a sentence or tree. * * @param args Args are ignored. */ public static void main(String[] args) { System.out.println(DEPENDENT.toPrettyString()); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy