All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.international.pennchinese.UniversalChineseGrammaticalRelations Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
// Stanford Dependencies - Code for producing and using Stanford dependencies.
// Copyright © 2005-2014 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//    [email protected]
//    http://nlp.stanford.edu/software/stanford-dependencies.shtml

package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.international.Language;

import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import static edu.stanford.nlp.trees.GrammaticalRelation.DEPENDENT;

/**
 * ChineseGrammaticalRelations is a
 * set of {@link GrammaticalRelation} objects for the Chinese language.
 * Examples are from CTB_001.fid
 *
 * TODO(pliang): need to take some of these relations and move them into a
 * Universal Stanford Dependencies class (e.g., dep, arg, mod).
 * Currently, we have an external data structure that stores information about
 * whether a relation is universal or not, but that should probably be moved
 * into GrammaticalRelation.
 *
 * TODO(pliang): add an option to produce trees which use only the USD
 * relations rather than the more specialized Chinese ones.
 *
 * @author Galen Andrew
 * @author Pi-Chuan Chang
 * @author Huihsin Tseng
 * @author Marie-Catherine de Marneffe
 * @author Percy Liang
 * @author Peng Qi
 * @see edu.stanford.nlp.trees.GrammaticalStructure
 * @see GrammaticalRelation
 * @see UniversalChineseGrammaticalStructure
 */
public class UniversalChineseGrammaticalRelations {

  /** This class is just a holder for static classes
   *  that act a bit like an enum.
   */
  private UniversalChineseGrammaticalRelations() {}

  // By setting the HeadFinder to null, we find out right away at
  // runtime if we have incorrectly set the HeadFinder for the
  // dependency tregexes
  private static final TregexPatternCompiler tregexCompiler = new TregexPatternCompiler((HeadFinder) null);


  private static final String COMMA_PATTERN = "/^,|,$/";
  private static final String MODAL_PATTERN = "/^(可(以|能)?)|能够?|应该?|将要?|必须|会$/";
  private static final String LOCATION_NOUNS = "/^((东|西|南|北)(边|侧|部|岸|麓|畔))|附近|近?旁|旁?边$/";

  /** Return an unmodifiable list of grammatical relations.
   *  Note: the list can still be modified by others, so you
   *  should still get a lock with {@code valuesLock()} before
   *  iterating over this list.
   *
   *  @return A list of grammatical relations
   */
  public static List values() {
    return Collections.unmodifiableList(values);
  }

  private static final ReadWriteLock valuesLock = new ReentrantReadWriteLock();

  public static Lock valuesLock() {
    return valuesLock.readLock();
  }


  public static GrammaticalRelation valueOf(String s) {
    return GrammaticalRelation.valueOf(s, values(), valuesLock());
  }

  ////////////////////////////////////////////////////////////
  // ARGUMENT relations
  ////////////////////////////////////////////////////////////

  /**
   * The "argument" (arg) grammatical relation (abstract).
   * Arguments are required by their heads.
   */
  public static final GrammaticalRelation ARGUMENT =
    new GrammaticalRelation(Language.UniversalChinese, "arg", "argument", DEPENDENT);

  /**
   * The "subject" (subj) grammatical relation (abstract).
   */
  public static final GrammaticalRelation SUBJECT =
    new GrammaticalRelation(Language.UniversalChinese, "subj", "subject", ARGUMENT);

  /**
   * The "nominal subject" (nsubj) grammatical relation.  A nominal subject is
   * a subject which is an noun phrase.
   * 

* *

   * Input:
   *   (ROOT
   *     (IP
   *       (NP
   *         (NP (NR 上海) (NR 浦东))
   *         (NP (NN 开发)
   *           (CC 与)
   *           (NN 法制) (NN 建设)))
   *       (VP (VV 同步))))
   * Output:
   *   nsubj(同步, 建设)
   *
   * 
* */ public static final GrammaticalRelation NOMINAL_SUBJECT = new GrammaticalRelation(Language.UniversalChinese, "nsubj", "nominal subject", SUBJECT, "IP|NP", tregexCompiler, "IP <( ( NP|QP=target!< NT ) $+ ( /^VP|VCD|IP/ !< VE ! * *
   * Input:
   *   (IP
   *     (NP (NN 镍))
   *     (VP (SB 被)
   *       (VP (VV 称作)
   *         (NP (PU “)
   *           (DNP
   *             (NP
   *               (ADJP (JJ 现代))
   *               (NP (NN 工业)))
   *             (DEG 的))
   *           (NP (NN 维生素))
   *           (PU ”)))))
   * Output:
   *   nsubjpass(称作-3, 镍-1)
   * 
*
*/ public static final GrammaticalRelation NOMINAL_PASSIVE_SUBJECT = new GrammaticalRelation(Language.UniversalChinese, "nsubjpass", "nominal passive subject", NOMINAL_SUBJECT, "IP", tregexCompiler, "IP < (NP=target $+ (VP|IP < SB|LB))"); /** * The "clausal subject" grammatical relation. A clausal subject is * a subject which is a clause. *

Examples: * *

   * 
* *

* Note: This one might not exist in Chinese, or very rare. * cdm 2016: There are a few CP-SBJ in the CTB like this one: * 我 估计 [CP-SBJ 他 欺负 别人 的 ] 多 * but it doesn't seem like there would be any way to detect them without using -SBJ */ public static final GrammaticalRelation CLAUSAL_SUBJECT = new GrammaticalRelation(Language.UniversalChinese, "csubj", "clausal subject", SUBJECT, "IP|VP", tregexCompiler, "IP|VP < ( /^IP(-SBJ)?/ < NP|QP|LCP $+ VP=target )", "IP|VP < ( /^IP(-SBJ)?/ < NP|QP|LCP $+ (PU $+ VP=target ))"); /** * The "complement" (comp) grammatical relation. */ public static final GrammaticalRelation COMPLEMENT = new GrammaticalRelation(Language.UniversalChinese, "comp", "complement", ARGUMENT); /** * The "object" (obj) grammatical relation. */ public static final GrammaticalRelation OBJECT = new GrammaticalRelation(Language.UniversalChinese, "obj", "object", COMPLEMENT); /** * The "direct object" (dobj) grammatical relation. *

* *

   * Input:
   *   (IP
   *     (NP (NR 上海) (NR 浦东))
   *     (VP
   *       (VCD (VV 颁布) (VV 实行))
   *            (AS 了)
   *            (QP (CD 七十一)
   *                (CLP (M 件)))
   *            (NP (NN 法规性) (NN 文件))))
   *
   *   In recent years Shanghai 's Pudong has promulgated and implemented
   *   some regulatory documents.
   * Output:
   *   dobj(颁布, 文件)
   * 
* */ public static final GrammaticalRelation DIRECT_OBJECT = new GrammaticalRelation(Language.UniversalChinese, "dobj", "direct object", OBJECT, "CP|VP", tregexCompiler, "VP < ( /^V*/ $+ NP|DP=target ) !< VC ", // 进入/VV 了/AS 夏季/NN "VP < ( /^V*/ $+ (AS $+ NP|DP=target) ) !< VC ", " VP < ( /^V*/ $+ NP|DP=target ! $+ NP|DP) !< VC ", "CP < (IP $++ NP=target ) !<< VC"); /** * The "indirect object" (iobj) grammatical relation. */ public static final GrammaticalRelation INDIRECT_OBJECT = new GrammaticalRelation(Language.UniversalChinese, "iobj", "indirect object", OBJECT, "VP", tregexCompiler, " CP !> VP < ( VV $+ ( NP|DP|QP|CLP=target . NP|DP ) )"); /** * The "clausal complement" (ccomp) grammatical relation. *

* *

   * Input:
   *   (IP
   *       (VP
   *         (VP
   *           (ADVP (AD 一))
   *           (VP (VV 出现)))
   *         (VP
   *           (ADVP (AD 就))
   *           (VP (SB 被)
   *             (VP (VV 纳入)
   *               (NP (NN 法制) (NN 轨道)))))))))))
   * Output:
   *   ccomp(出现, 纳入)
   * 
* */ public static final GrammaticalRelation CLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.UniversalChinese, "ccomp", "clausal complement", COMPLEMENT, "VP|ADJP|IP", tregexCompiler, " VP < (VV|VC|VRD|VCD|VSB|VE $++ IP|VP|VRD|VCD|VSB|CP=target) !< NP|QP|LCP > IP|VP ", "VP < (VV $+ NP $++ IP=target)"); // " VP|IP < ( VV|VC|VRD|VCD !$+ NP|QP|LCP ) > (IP < IP|VP|VRD|VCD=target) " // "VP < (S=target < (VP !<, TO|VBG) !$-- NP)", /** * The "xclausal complement" (xcomp) grammatical relation. */ // pichuan: this is difficult to recognize in Chinese. // remove the rules since it (always) collides with ccomp public static final GrammaticalRelation XCLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.UniversalChinese, "xcomp", "xclausal complement", COMPLEMENT, "VP", tregexCompiler, // fixme [pengqi 2016]: this is just a temporary solution to deal with VV $+ VP structures // that are clearly not aux:modal "VP < (VV=target $+ VP !< " + MODAL_PATTERN + ")" ); //////////////////////////////////////////////////////////// // MODIFIER relations //////////////////////////////////////////////////////////// /** * The "modifier" (mod) grammatical relation (abstract). */ public static final GrammaticalRelation MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "mod", "modifier", DEPENDENT); /** * The "number modifier" (nummod) grammatical relation. *

*

   * 
   * Input:
   *   (NP
   *         (NP (NN 拆迁) (NN 工作))
   *         (QP (CD 若干))
   *         (NP (NN 规定)))
   * Output:
   *   nummod(规定-48, 若干-47)
   * 
   * 
*/ public static final GrammaticalRelation NUMERIC_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "nummod", "numeric modifier", MODIFIER, "QP|NP|DP", tregexCompiler, "NP|QP < ( QP =target << M $++ NN|NP|QP)", "NP|QP < ( DNP=target < (QP < CD !< OD) !< JJ|ADJP $++ NP|QP )" // the following rule is merged into mark:clf //"DP < ( DT $+ CLP=target )" ); /** * The "appositional modifier" (appos) grammatical relation (abstract). */ public static final GrammaticalRelation APPOSITIONAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "appos", "appositional modifier", MODIFIER, "NP", tregexCompiler, "NP < (/^NP(-APP)?$/=target !<<- " + LOCATION_NOUNS + " !< NT !<: NR $+ (NP <: NR !$+ __))"); public static final GrammaticalRelation PARATAXIS = new GrammaticalRelation(Language.UniversalChinese, "parataxis", "parataxis", DEPENDENT); /** * The "parenthetical modifier" (prnmod) grammatical relation (Chinese-specific). */ public static final GrammaticalRelation PARENTHETICAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "parataxis:prnmod", "parenthetical modifier", PARATAXIS, "NP", tregexCompiler, "NP < PRN=target "); /** * The "noun modifier" grammatical relation. */ public static final GrammaticalRelation NOUN_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "nmod", "noun modifier", MODIFIER, "NP", tregexCompiler, "NP < (NP=target < NR !$+ PU|CC|NP|NN $++ NP|PRN)", "NP < (NP=target $+ (NP <: NR)) [$- P|LC | $+ P|LC]", "NP|QP < ( DNP =target < (NP < NT) $++ NP|QP )", "NP|QP < ( DNP =target < LCP|PP $++ NP|QP )"); /** * The "range" grammatical relation (Chinese only). The indirect * object of a VP is the quantifier phrase which is the (dative) object * of the verb.

*

* *

   * Input:
   *   (VP (VV 成交)
   *       (NP (NN 药品))
   *       (QP (CD 一亿多)
   *           (CLP (M 元))))
   * Output:
   *   range(成交, 元)
   * 
* */ public static final GrammaticalRelation RANGE = new GrammaticalRelation(Language.UniversalChinese, "nmod:range", "range", NOUN_MODIFIER, "VP", tregexCompiler, "VP < ( NP|DP|QP $+ DP|QP=target)", "VP < ( VV $+ QP=target )"); public static final GrammaticalRelation POSSESSIVE_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "nmod:poss", "possessive modifier", NOUN_MODIFIER, "NP", tregexCompiler, "NP < (PN=target $+ NN)"); /** * The "temporal modifier" grammatical relation. * (IP * (VP * (NP (NT 以前)) * (ADVP (AD 不)) * (ADVP (AD 曾)) * (VP (VV 遇到) (AS 过)))) *(VP * (LCP * (NP (NT 近年)) * (LC 来)) * (VP * (VCD (VV 颁布) (VV 实行)) * {@code tmod } (遇到, 以前) */ public static final GrammaticalRelation TEMPORAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "nmod:tmod", "temporal modifier", NOUN_MODIFIER, "VP|IP", tregexCompiler, "VP|IP < (NP=target < NT $++ VP)"); /* This rule actually matches nothing. There's another tmod rule. This is removed for now. (pichuan) Sun Mar 8 18:22:40 2009 */ /* public static final GrammaticalRelation TEMPORAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "tmod", "temporal modifier", MODIFIER, "VP|IP|ADJP", tregexCompiler, new String[]{ " VC|VE ! >> VP|ADJP < NP=target < NT", "VC|VE !>>IP <( NP=target < NT $++ VP !< VC|VE )" }); */ public static final GrammaticalRelation CLAUSAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "acl", "clausal modifier of noun", MODIFIER, "NP", tregexCompiler, //"NP $++ (CP=target << VV) > NP ", "NP < ( CP=target $++ NP << VV)", "NP < IP=target "); /** * The "relative clause modifier" (relcl) grammatical relation. *

*

   * 
   * Input:
   *   (NP-PRD (CP (WHNP-3 (-NONE- *OP*))
   *       (CP (IP (NP-SBJ (-NONE- *pro*))
   *         (VP (NP-TMP (NT 以前))
   *             (ADVP (AD 不))
   *             (ADVP (AD 曾))
   *             (VP (VV 遇到)
   *           (AS 过)
   *           (NP-OBJ (-NONE- *T*-3)))))
   *           (DEC 的)))
   *         (NP (NP (ADJP (JJ 新))
   *           (NP (NN 情况)))
   *       (PU 、)
   *       (NP (ADJP (JJ 新))
   *           (NP (NN 问题)))))
   *   The new problem that has not been encountered.
   * Output:
   *   relcl(问题, 遇到)
   * 
   * 
*/ /* merged into acl public static final GrammaticalRelation RELATIVE_CLAUSE_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "acl", "adjectival clause modifier", CLAUSAL_MODIFIER, "NP", tregexCompiler, "NP $++ (CP=target << VV) > NP ", "NP < ( CP=target $++ NP << VV)", "NP < IP=target "); */ /* * The "non-finite clause" grammatical relation. * This used to be verb modifier (vmod). */ /* merged into acl public static final GrammaticalRelation NONFINITE_CLAUSE_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "acl:nfincl", "non-finite clause modifier (examples: stores[head] based[modifier] in Boston", CLAUSAL_MODIFIER, "NP", tregexCompiler, "NP < IP=target "); */ /** * The "adjective modifier" (amod) grammatical relation. *

*

   * 
   * Input:
   *   (NP
   *     (ADJP (JJ 新))
   *     (NP (NN 情况)))
   * Output:
   *   amod(情况-34, 新-33)
   * 
   * 
*/ public static final GrammaticalRelation ADJECTIVAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "amod", "adjectival modifier", MODIFIER, "NP|CLP|QP", tregexCompiler, "NP|CLP|QP < (ADJP=target $++ NP|CLP|QP ) ", "NP $++ (CP=target << VA !<< VV) > NP ", "NP < ( CP=target $++ NP << VA !<< VV)", "NP|QP < ( DNP=target < JJ|ADJP !< NP|QP $++ NP|QP )"); /** * The "ordinal modifier" (ordmod) grammatical relation. */ public static final GrammaticalRelation ORDINAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "amod:ordmod", "ordinal numeric modifier", ADJECTIVAL_MODIFIER, "NP|QP", tregexCompiler, "NP < (QP=target < OD !< CLP)", "NP|QP < ( DNP=target < (QP < OD !< CD) !< JJ|ADJP $++ NP|QP )" // the following rule is merged into mark:clf //"QP < (OD=target $+ CLP)" ); /** * The "determiner modifier" (det) grammatical relation. *

*

   * 
   * Input:
   *   (NP (DP (DT 这些))
   *       (NP (NN 经济) (NN 活动)))
   * Output:
   *   det(活动-61, 这些-59)
   * 
   * 
*/ public static final GrammaticalRelation DETERMINER = new GrammaticalRelation(Language.UniversalChinese, "det", "determiner", MODIFIER, "^NP|DP", tregexCompiler, "/^NP/ < (DP=target $++ NP )" //"DP < DT < QP=target" ); /** * The "negative modifier" (neg) grammatical relation. *

*

   * 
   * Input:
   *   (VP
   *     (NP (NT 以前))
   *     (ADVP (AD 不))
   *     (ADVP (AD 曾))
   *     (VP (VV 遇到) (AS 过))))
   * Output:
   *   neg(遇到-30, 不-28)
   * 
   * 
*/ public static final GrammaticalRelation NEGATION_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "neg", "negation modifier", MODIFIER, "VP|ADJP|IP", tregexCompiler, "VP|ADJP|IP < (AD|VV=target < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/)", "VP|ADJP|IP < (ADVP|VV=target < (AD < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/))"); /** * The "adverbial modifier" (advmod) grammatical relation. *

*

   * 
   * Input:
   *   (VP
   *     (ADVP (AD 基本))
   *     (VP (VV 做到) (AS 了)
   * Output:
   *   advmod(做到-74, 基本-73)
   * 
   * 
*/ public static final GrammaticalRelation ADVERBIAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "advmod", "adverbial modifier", MODIFIER, "VP|ADJP|IP|CP|PP|NP|QP", tregexCompiler, "VP|ADJP|IP|CP|PP|NP < (ADVP=target !< (AD < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/))", "VP|ADJP < AD|CS=target", "QP < (ADVP=target $+ QP)", "QP < ( QP $+ ADVP=target)"); public static final GrammaticalRelation ADV_CLAUSAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "advcl", "clausal adverb", ADVERBIAL_MODIFIER); /** * The "dvp modifier" grammatical relation. *

* *

   * Input:
   *   (VP (DVP
   *         (VP (VA 简单))
   *         (DEV 的))
   *       (VP (VV 采取) ...))
   * Output:
   *   dvpmod(采取-9, 简单-7)
   * 
* */ public static final GrammaticalRelation DVPM_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "advmod:dvp", "dvp modifier", ADVERBIAL_MODIFIER, "VP", tregexCompiler, " VP < ( DVP=target $+ VP) "); //////////////////////////////////////////////////////////// // Special clausal dependents //////////////////////////////////////////////////////////// /** * The "auxiliary" (aux) grammatical relation. */ public static final GrammaticalRelation AUX_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "aux", "auxiliary (example: should[modifier] leave[head])", DEPENDENT, "VP", tregexCompiler ); /** * The "modal" grammatical relation. * (IP * (NP (NN 利益)) * (VP (VV 能) * (VP (VV 得到) * (NP (NN 保障))))))))) * mmod (得到-64, 能-63) */ public static final GrammaticalRelation MODAL_VERB = new GrammaticalRelation(Language.UniversalChinese, "aux:modal", "modal verb", AUX_MODIFIER, "VP", tregexCompiler, // todo [pengqi]: using MODAL_PATTERN would render many cases of VV $+ VP // as dep, need to assign a type to that structure. Also in that case // need to clarify which verb is the head "VP < ( VV=target < " + MODAL_PATTERN + " !< /^没有$/ $+ VP|VRD )"); /** * The "aspect marker" grammatical relation. * (VP * (ADVP (AD 基本)) * (VP (VV 做到) (AS 了) * asp (做到,了) */ public static final GrammaticalRelation ASPECT_MARKER = new GrammaticalRelation(Language.UniversalChinese, "aux:asp", "aspect", AUX_MODIFIER, "VP", tregexCompiler, "VP < ( /^V*/ $+ AS=target)"); /** * The "auxiliary passive" (auxpass) grammatical relation. */ public static final GrammaticalRelation AUX_PASSIVE_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "auxpass", "auxiliary passive", MODIFIER, "VP", tregexCompiler, "VP < SB|LB=target"); /** * The "copula" grammatical relation. *

*

   * 
   * Input:
   *   (IP (NP (NR 浦东))
   *       (VP (VC 是)
   *           (NP (NN 工程)))))
   * Output (formerly reverse(attr)):
   *   cop(工程,是)
   * 
   * 
*/ public static final GrammaticalRelation COPULA = new GrammaticalRelation(Language.UniversalChinese, "cop", "copula", DEPENDENT, "VP", tregexCompiler, " VP < VC=target"); /** * The "marker" (mark) grammatical relation. A marker is the word * introducing a finite clause subordinate to another clause. *

*

   * 
   * Input:
   *   (PP (P 因为)
   *       (IP
   *         (VP
   *           (VP
   *             (ADVP (AD 一))
   *             (VP (VV 开始)))
   *           (VP
   *             (ADVP (AD 就))
   *             (ADVP (AD 比较))
   *             (VP (VA 规范))))))
   * Output (formerly reverse(pccomp)):
   *   mark(开始-20,因为-18)
   *
   * Input:
   *   (LCP (IP (NP-SBJ (-NONE- *pro*))
   *     (VP (VV 积累) (AS 了) (NP-OBJ (NN 经验)))) (LC 以后))
   * Output (formerly reverse(lccomp)):
   *   mark(积累, 以后)
   *
   * Input:
   *   (CP
   *         (IP
   *           (VP
   *             (VP (VV 振兴)
   *               (NP (NR 上海)))
   *             (PU ,)
   *             (VP (VV 建设)
   *               (NP
   *                 (NP (NN 现代化))
   *                 (NP (NN 经济) (PU 、) (NN 贸易) (PU 、) (NN 金融))
   *                 (NP (NN 中心))))))
   *         (DEC 的))
   * Output (formerly cpm):
   *   mark(振兴, 的)
   *
   * Input:
   *   (DVP
   *     (VP (VA 简单))
   *     (DEV 的))
   * Output (formerly dvpm):
   *   mark(简单-7, 的-8)
   * 
   * 
*/ public static final GrammaticalRelation MARK = new GrammaticalRelation(Language.UniversalChinese, "mark", "marker (examples: that[modifier] expanded[head]; 开发/expand[head] 浦东/Pudong 的[modifier])", DEPENDENT, "^PP|^LCP|^CP|^DVP", tregexCompiler, "/^PP/ < (P=target $+ VP)", "/^LCP/ < (P=target $+ VP)", "/^CP/ < (__ $++ DEC=target)", "DVP < (__ $+ DEV=target)"); /** * The "punctuation" grammatical relation. This is used for any piece of * punctuation in a clause, if punctuation is being retained in the * typed dependencies. */ public static final GrammaticalRelation PUNCTUATION = new GrammaticalRelation(Language.UniversalChinese, "punct", "punctuation", DEPENDENT, ".*", tregexCompiler, "__ < PU=target"); //////////////////////////////////////////////////////////// // Other (compounding, coordination) //////////////////////////////////////////////////////////// /** * The "compound" grammatical relation (abstract). */ public static final GrammaticalRelation COMPOUND = new GrammaticalRelation(Language.UniversalChinese, "compound", "compound (examples: phone book, three thousand)", ARGUMENT); /** * The "noun compound" (nn) grammatical relation. * Example: * (ROOT * (IP * (NP * (NP (NR 上海) (NR 浦东)) * (NP (NN 开发) * (CC 与) * (NN 法制) (NN 建设))) * (VP (VV 同步)))) * compound:nn (浦东, 上海) */ public static final GrammaticalRelation NOUN_COMPOUND = new GrammaticalRelation(Language.UniversalChinese, "compound:nn", "noun compound", COMPOUND, "^NP", tregexCompiler, "NP < (NN|NR|NT=target [$+ NN|NT $- NN|NP | $+ (NN|NT $+ NN|NP|NR)])", "NP < (NN|NR|NT=target !$+ PU|CC|DNP $++ NN|NT)", "NP < (NN|NR|NT $+ FW=target)", "NP < (NP=target !< NR !$+ PU|CC|DNP $++ (NP|PRN !< NR|QP))", // the following rule captures some exceptions from nmod:assmod "NP < (NP=target < NR $+ (NP [<<# NR | $+ NR|NN | $+ (__ <<# NR) | $+ /^[^N]/]))", "NP < (NP=target < NN !< NR $+ (NP < NN|NT))"); /** * The "name" grammatical relation. */ public static final GrammaticalRelation NAME = new GrammaticalRelation(Language.UniversalChinese, "name", "name", COMPOUND, "^NP", tregexCompiler, "NP < (NR=target $+ NR)"); /** * The "coordinated verb compound" grammatical relation. * (VCD (VV 颁布) (VV 实行)) * comod(颁布-5, 实行-6) */ public static final GrammaticalRelation VERB_COMPOUND = new GrammaticalRelation(Language.UniversalChinese, "compound:vc", "coordinated verb compound", COMPOUND, "VCD|VSB", tregexCompiler, "VCD < ( VV|VA $+ VV|VA=target)", "VSB < ( VV|VA=target $+ VV|VA)"); /** * The "conjunct" (conj) grammatical relation. *

* *

   * Input:
   *   (ROOT
   *     (IP
   *       (NP
   *         (NP (NR 上海) (NR 浦东))
   *         (NP (NN 开发)
   *           (CC 与)
   *           (NN 法制) (NN 建设)))
   *       (VP (VV 同步))))
   *
   *   The development of Shanghai 's Pudong is in step with the establishment
   *   of its legal system.
   * Output:
   *   conj(建设, 开发) [should be reversed]
   * 
* * * TODO(pliang): make first item the head and the subsequent ones modifiers. */ public static final GrammaticalRelation CONJUNCT = new GrammaticalRelation(Language.UniversalChinese, "conj", "conjunct", DEPENDENT, "FRAG|INC|IP|VP|NP|ADJP|PP|ADVP|UCP", tregexCompiler, "NP|ADJP|PP|ADVP|UCP < (!PU|CC=target $+ CC)", // Split the first rule to the second rule to avoid the duplication: // ccomp(前来-12, 投资-13) // conj(前来-12, 投资-13) // // (IP // (VP // (VP (VV 前来)) // (VP // (VCD (VV 投资) (VV 办厂))) // (CC 和) // (VP (VV 洽谈) // (NP (NN 生意)))))) "VP < (!PU|CC=target !$- VP $+ CC)", // TODO: this following line has to be fixed. // I think for now it just doesn't match anything. "VP|NP|ADJP|PP|ADVP|UCP < ( __=target $+ PU $+ CC)", //"VP|NP|ADJP|PP|ADVP|UCP < ( __=target $+ (PU < 、) )", // Consider changing the rule ABOVE to these rules. "VP < ( /^V/=target $+ ((PU < 、) $+ /^V/))", "NP < ( /^N/=target $+ ((PU < 、) $+ /^N/))", "ADJP < ( JJ|ADJP=target $+ ((PU < 、) $+ JJ|ADJP))", "PP < ( /^P/=target $+ ((PU < 、) $+ /^P/))", //"ADVP < ( /^AD/=target $+ ((PU < 、) $+ /^AD/))", "ADVP < ( /^AD/ $+ ((PU < 、) $+ /^AD/=target))", "UCP < ( !PU|CC=target $+ (PU < 、) )", // This is for the 'conj's separated by commas. // For now this creates too much duplicates with 'ccomp'. // Need to look at more examples. "PP < (PP $+ PP=target )", "NP <( NP=target $+ ((PU < 、) $+ NP) )", "NP <( NN|NR|NT|PN=target $+ ((PU < ,|、) $+ NN|NR|NT|PN) )", "VP < (CC $+ VV=target)", // Original version of this did not have the outer layer of // the FRAG|INC|IP|VP. This caused a bug where the basic // dependencies could have cycles. "FRAG|INC|IP|VP < (VP < VV|VC|VRD|VCD|VE|VA < NP|QP|LCP $ IP|VP|VRD|VCD|VE|VC|VA=target) ", // splitting the following into two rules for accuracy // "IP|VP < ( IP|VP < NP|QP|LCP $ IP|VP=target )", "IP|VP < ( IP < NP|QP|LCP $ IP=target )", "IP|VP < ( VP $ VP=target )"); /** * The "coordination" grammatical relation. * A coordination is the relation between * an element and a conjunction.

* *

   * Input:
   *   (ROOT
   *     (IP
   *       (NP
   *         (NP (NR 上海) (NR 浦东))
   *         (NP (NN 开发)
   *           (CC 与)
   *           (NN 法制) (NN 建设)))
   *       (VP (VV 同步))))
   * Output:
   *   cc(建设, 与) [should be cc(开发, 与)]
   * 
* * TODO(pliang): by convention, the first item in the coordination should be * chosen, but currently, it's the head, which happens to be the last. */ public static final GrammaticalRelation COORDINATION = new GrammaticalRelation(Language.UniversalChinese, "cc", "coordination", DEPENDENT, "VP|NP|ADJP|PP|ADVP|UCP|IP|QP", tregexCompiler, "VP|NP|ADJP|PP|ADVP|UCP|IP|QP < (CC=target)"); /** * The "case" grammatical relation. * This covers prepositions, localizers, and associative markers. *

*

   * 
   * Input:
   *   (PP (P 根据)
   *       (NP
   *         (DNP
   *           (NP
   *             (NP (NN 国家))
   *             (CC 和)
   *             (NP (NR 上海市)))
   *           (DEG 的))
   *         (ADJP (JJ 有关))
   *         (NP (NN 规定))))
   * Output (formerly reverse(pobj)):
   *   case(规定-19, 根据-13)
   *
   * Input:
   *   (LCP
   *       (NP (NT 近年))
   *       (LC 来))
   * Output (formerly reverse(lobj)):
   *   case(近年-3, 来-4)
   *
   * Input:
   *   (NP (DNP
   *         (NP (NP (NR 浦东))
   *         (NP (NN 开发)))
   *         (DEG 的))
   *       (ADJP (JJ 有序))
   *       (NP (NN 进行)))
   * Output (formerly reverse(assm)):
   *   case(开发-31, 的-32)
   *
   * Input:
   *   (PP (P 在)
   *     (LCP
   *       (NP
   *         (DP (DT 这)
   *             (CLP (M 片)))
   *         (NP (NN 热土)))
   *       (LC 上)))
   * Output (formerly reverse(plmod)):
   *   case(热土, 在)
   * 
   * 
*/ public static final GrammaticalRelation CASE = new GrammaticalRelation(Language.UniversalChinese, "case", "case marking (examples: Chair[head] 's[modifier], 根据/according[modifier] ... 规定/rule[head]; 近年/this year[head] 来[modifier])", DEPENDENT, "^PP|^LCP|^DNP", tregexCompiler, "/^PP/ < P=target", "/^LCP/ < LC=target", "/^DNP/ < DEG=target", "PP < ( P=target $++ LCP )"); /** * The "associative modifier" (nmod:assmod) grammatical relation (Chinese-specific). * See "case" for example. */ public static final GrammaticalRelation ASSOCIATIVE_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "nmod:assmod", "associative modifier (examples: 上海市/Shanghai[modifier] 的 规定/law[head])", NOUN_MODIFIER, "NP|QP|DNP", tregexCompiler, "NP|QP < ( DNP =target < (NP !< NT) $++ NP|QP ) ", "NP|DNP < (/^NP(-PN)?$/=target < NR $+ (NP !<<# NR !$+ NR|NN !$+ (__ <<# NR) !$+ /^[^N]/) !$- NP|NN)", // the following rule is merged into compound:nn //"NP < (NR=target $+ NN)", "NP < (NP=target !< NR !$+ PU|CC $++ (NP|PRN < QP))"); /** * The "nominal topic" (nmod:topic) grammatical relation (Chinese-specific). * Example: * * Input: * (IP (NP-TPC (NP-APP (ADJP (JJ 现任)) * (NP (NN 总统))) * (NP-PN (NR 米洛舍维奇))) * (NP-TMP (NT 2001年)) * (NP-SBJ (NN 总统) * (NN 任期)) * (VP (VV 到期))) * Output: * nmod:topic(到期, 米洛舍维奇) * */ public static final GrammaticalRelation NOMINAL_TOPIC_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "nmod:topic", "nominal topic", NOUN_MODIFIER, "IP", tregexCompiler, "IP < (/^NP.*(-TPC)?/=target $++ (NP $+ VP) !< NT)"); //////////////////////////////////////////////////////////// // Other stuff: pliang: not sure exactly where they should go. //////////////////////////////////////////////////////////// /* * The "prepositional localizer modifier" grammatical relation. * (PP (P 在) * (LCP * (NP * (DP (DT 这) * (CLP (M 片))) * (NP (NN 热土))) * (LC 上))) * plmod(在-25, 上-29) */ /* * pengqi Jul 2016: This shouldn't exist in UD and is replaced by case * public static final GrammaticalRelation PREPOSITIONAL_LOCALIZER_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "plmod", "prepositional localizer modifier", MODIFIER, "PP", tregexCompiler, "PP < ( P $++ LCP=target )"); */ /** * The "adjectival complement" grammatical relation. * Example: */ // deleted by pichuan: no real matches /* public static final GrammaticalRelation ADJECTIVAL_COMPLEMENT = new GrammaticalRelation(Language.UniversalChinese, "acomp", "adjectival complement", COMPLEMENT, "VP", tregexCompiler, new String[]{ "VP < (ADJP=target !$-- NP)" }); */ /** * The "localizer complement" grammatical relation. * (VP (VV 占) * (LCP * (QP (CD 九成)) * (LC 以上))) * (PU ,) * (vp (VV 达) * (QP (CD 四百三十八点八亿) * (CLP (M 美元)))) * loc (占-11, 以上-13) */ public static final GrammaticalRelation LOCALIZER_COMPLEMENT = new GrammaticalRelation(Language.UniversalChinese, "advmod:loc", "localizer complement", ADVERBIAL_MODIFIER, "VP|IP", tregexCompiler, "VP|IP < (LCP=target !< IP) "); public static final GrammaticalRelation CLAUSAL_LOCALIZER_COMPLEMENT = new GrammaticalRelation(Language.UniversalChinese, "advcl:loc", "localizer complement", ADV_CLAUSAL_MODIFIER, "VP|IP", tregexCompiler, "VP|IP < (LCP=target < IP) "); /** * The "resultative complement" grammatical relation. */ public static final GrammaticalRelation RESULTATIVE_COMPLEMENT = new GrammaticalRelation(Language.UniversalChinese, "advmod:rcomp", "result verb", ADVERBIAL_MODIFIER, "VRD", tregexCompiler, "VRD < ( /V*/ $+ /V*/=target )"); /** * The "ba" grammatical relation. */ public static final GrammaticalRelation BA = new GrammaticalRelation(Language.UniversalChinese, "aux:ba", "ba", AUX_MODIFIER, "VP|IP", tregexCompiler, "VP|IP < BA=target "); /** * The "classifier marker" grammatical relation. *

* *

   * Input:
   *   ((QP (CD 七十一)
   *        (CLP (M 件)))
   *    (NP (NN 法规性) (NN 文件)))
   * Output:
   *   mark:clf(七十一, 件)
   * 
* */ public static final GrammaticalRelation CLASSIFIER_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "mark:clf", "classifier marker", MARK, "QP|DP", tregexCompiler, "QP < M=target", "QP < CLP=target", "DP < ( DT $+ CLP=target )"); /** * The "possession modifier" grammatical relation. */ // Fri Feb 20 15:40:13 2009 (pichuan) // I think this "poss" relation is just WRONG. // DEC is a complementizer or a nominalizer, // this rule probably originally want to capture "DEG". // But it seems like it's covered by "assm" (associative marker). /* public static final GrammaticalRelation POSSESSION_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "poss", "possession modifier", MODIFIER, "NP", tregexCompiler, new String[]{ "NP < ( PN=target $+ DEC $+ NP )" }); */ /** * The "possessive marker" grammatical relation. */ // Similar to the comments to "poss", // I think this relation is wrong and will not appear. /* public static final GrammaticalRelation POSSESSIVE_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "possm", "possessive marker", MODIFIER, "NP", tregexCompiler, new String[]{ "NP < ( PN $+ DEC=target ) " }); */ /** * The "prepositional modifier" grammatical relation. *(IP * (PP (P 对) * (NP (PN 此))) * (PU ,) * (NP (NR 浦东)) * (VP * (VP * (ADVP (AD 不)) * (VP (VC 是) * (VP * (DVP * (VP (VA 简单)) * (DEV 的)) * (VP (VV 采取) * nmod (采取-9, 此-1) */ public static final GrammaticalRelation PREPOSITIONAL_MODIFIER = new GrammaticalRelation(Language.UniversalChinese, "nmod:prep", "prepositional modifier", NOUN_MODIFIER, "^NP|VP|IP", tregexCompiler, "/^NP/ < /^PP/=target", "VP < /^PP/=target", "IP < /^PP/=target "); /** * The "participial modifier" (prtmod) grammatical relation. */ public static final GrammaticalRelation PART_VERB = new GrammaticalRelation(Language.UniversalChinese, "aux:prtmod", "particle verb", AUX_MODIFIER, "VP|IP", tregexCompiler, "VP|IP < ( MSP=target )"); /** * The "etc" grammatical relation. * (NP * (NP (NN 经济) (PU 、) (NN 贸易) (PU 、) (NN 建设) (PU 、) (NN 规划) (PU 、) (NN 科技) (PU 、) (NN 文教) (ETC 等)) * (NP (NN 领域))) * etc (办法-70, 等-71) */ public static final GrammaticalRelation ETC = new GrammaticalRelation(Language.UniversalChinese, "etc", "ETC", MODIFIER, "^NP", tregexCompiler, "/^NP/ < (NN|NR . ETC=target)"); /** * The "xsubj" grammatical relation, replaced with "nsubj:xsubj". *(IP * (NP (PN 有些)) * (VP * (VP * (ADVP (AD 还)) * (ADVP (AD 只)) * (VP (VC 是) * (NP * (ADJP (JJ 暂行)) * (NP (NN 规定))))) * (PU ,) * (VP (VV 有待) * (IP * (VP * (PP (P 在) * (LCP * (NP (NN 实践)) * (LC 中))) * (ADVP (AD 逐步)) * (VP (VV 完善)))))))))) * nsubj (完善-26, 规定-14) */ public static final GrammaticalRelation CONTROLLED_SUBJECT = new GrammaticalRelation(Language.UniversalChinese, "nsubj:xsubj", "controlled subject", NOMINAL_SUBJECT, "VP", tregexCompiler, "VP !< NP < VP > (IP !$- NP !< NP !>> (VP < VC ) >+(VP) (VP $-- NP=target))"); /** * The "discourse" (discourse) grammatical relation. */ public static final GrammaticalRelation DISCOURSE = new GrammaticalRelation(Language.UniversalChinese, "discourse", "discourse", ARGUMENT, "CP", tregexCompiler, "CP < SP=target"); // Universal GrammaticalRelations private static final GrammaticalRelation chineseOnly = null; // Place-holder: put this after a relation to mark it as Chinese-only private static final GrammaticalRelation[] rawValues = { DEPENDENT, ARGUMENT, SUBJECT, NOMINAL_SUBJECT, NOMINAL_PASSIVE_SUBJECT, CLAUSAL_SUBJECT, //CLAUSAL_PASSIVE_SUBJECT, // Exists in Chinese? COMPLEMENT, OBJECT, DIRECT_OBJECT, INDIRECT_OBJECT, CLAUSAL_COMPLEMENT, XCLAUSAL_COMPLEMENT, // Exists in Chinese? MODIFIER, // Nominal heads, nominal dependents NUMERIC_MODIFIER, ORDINAL_MODIFIER, chineseOnly, APPOSITIONAL_MODIFIER, PARENTHETICAL_MODIFIER, chineseOnly, NOUN_MODIFIER, RANGE, chineseOnly, ASSOCIATIVE_MODIFIER, chineseOnly, TEMPORAL_MODIFIER, chineseOnly, POSSESSIVE_MODIFIER, NOMINAL_TOPIC_MODIFIER, chineseOnly, // Nominal heads, predicate dependents //NOMINALIZED_CLAUSE_MODIFIER, // Exists in Chinese? ADJECTIVAL_MODIFIER, DETERMINER, NEGATION_MODIFIER, CLAUSAL_MODIFIER, // Predicate heads ADVERBIAL_MODIFIER, DVPM_MODIFIER, chineseOnly, ADV_CLAUSAL_MODIFIER, CLAUSAL_LOCALIZER_COMPLEMENT, chineseOnly, // Special clausal dependents //VOCATIVE, //DISCOURSE, //EXPL, AUX_MODIFIER, MODAL_VERB, chineseOnly, ASPECT_MARKER, chineseOnly, AUX_PASSIVE_MODIFIER, COPULA, MARK, CLASSIFIER_MODIFIER, chineseOnly, PUNCTUATION, // Other COMPOUND, NOUN_COMPOUND, chineseOnly, VERB_COMPOUND, chineseOnly, NAME, CONJUNCT, COORDINATION, CASE, DISCOURSE, // Don't know what to do about these //PREPOSITIONAL_LOCALIZER_MODIFIER, chineseOnly, LOCALIZER_COMPLEMENT, chineseOnly, RESULTATIVE_COMPLEMENT, chineseOnly, BA, chineseOnly, PREPOSITIONAL_MODIFIER, chineseOnly, PART_VERB, chineseOnly, ETC, chineseOnly, CONTROLLED_SUBJECT, chineseOnly, }; private static final List values = new ArrayList<>(); // Cache frequently used views of the values list private static final List synchronizedValues = Collections.synchronizedList(values); public static final Set universalValues = new HashSet<>(); // Map from GrammaticalRelation short names to their corresponding // GrammaticalRelation objects public static final Map shortNameToGRel = new ConcurrentHashMap<>(); static { for (int i = 0; i < rawValues.length; i++) { GrammaticalRelation gr = rawValues[i]; if (gr == chineseOnly) continue; synchronizedValues.add(gr); if (i + 1 == rawValues.length || rawValues[i + 1] != chineseOnly) { universalValues.add(gr); } } valuesLock().lock(); try { for (GrammaticalRelation gr : UniversalChineseGrammaticalRelations.values()) { shortNameToGRel.put(gr.getShortName(), gr); } } finally { valuesLock().unlock(); } } /** * Prints out the Chinese grammatical relations hierarchy. * * @param args Args are ignored. */ public static void main(String[] args) { System.out.println(DEPENDENT.toPrettyString()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy