edu.berkeley.nlp.ling.BikelChineseHeadFinder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.ling;
import java.util.HashMap;
import edu.berkeley.nlp.treebank.ChineseTreebankLanguagePack;
import edu.berkeley.nlp.treebank.TreebankLanguagePack;
/**
* A headfinder implementing Dan Bikel's head rules.
* March 2005: Updated to match the head-finding rules found in
* Bikel's thesis (2004).
*
* @author Galen Andrew
* @author Christopher Manning.
*/
public class BikelChineseHeadFinder extends AbstractCollinsHeadFinder {
/**
*
*/
private static final long serialVersionUID = 1L;
public BikelChineseHeadFinder() {
this(new ChineseTreebankLanguagePack());
}
public BikelChineseHeadFinder(TreebankLanguagePack tlp) {
super(tlp);
nonTerminalInfo = new HashMap();
// these are first-cut rules
defaultRule = new String[]{"right"};
// ROOT is not always unary for chinese -- PAIR is a special notation
// that the Irish people use for non-unary ones....
nonTerminalInfo.put("ROOT", new String[][]{{"left", "IP"}});
nonTerminalInfo.put("PAIR", new String[][]{{"left", "IP"}});
// Major syntactic categories
nonTerminalInfo.put("ADJP", new String[][]{{"right", "ADJP", "JJ"}, {"right", "AD", "NN", "CS"}});
nonTerminalInfo.put("ADVP", new String[][]{{"right", "ADVP", "AD"}});
nonTerminalInfo.put("CLP", new String[][]{{"right", "CLP", "M"}});
nonTerminalInfo.put("CP", new String[][]{{"right", "DEC", "SP"}, {"left", "ADVP", "CS"}, {"right", "CP", "IP"}});
nonTerminalInfo.put("DNP", new String[][]{{"right", "DNP", "DEG"}, {"right", "DEC"}});
nonTerminalInfo.put("DP", new String[][]{{"left", "DP", "DT"}});
nonTerminalInfo.put("DVP", new String[][]{{"right", "DVP", "DEV"}});
nonTerminalInfo.put("FRAG", new String[][]{{"right", "VV", "NR", "NN"}});
nonTerminalInfo.put("INTJ", new String[][]{{"right", "INTJ", "IJ"}});
nonTerminalInfo.put("IP", new String[][]{{"right", "IP", "VP"}, {"right", "VV"}});
nonTerminalInfo.put("LCP", new String[][]{{"right", "LCP", "LC"}});
nonTerminalInfo.put("LST", new String[][]{{"left", "LST", "CD", "OD"}});
nonTerminalInfo.put("NP", new String[][]{{"right", "NP", "NN", "NT", "NR", "QP"}});
nonTerminalInfo.put("PP", new String[][]{{"left", "PP", "P"}});
nonTerminalInfo.put("PRN", new String[][]{{"right", "NP", "IP", "VP", "NT", "NR", "NN"}});
nonTerminalInfo.put("QP", new String[][]{{"right", "QP", "CLP", "CD", "OD"}});
nonTerminalInfo.put("UCP", new String[][]{{"right"}});
nonTerminalInfo.put("VP", new String[][]{{"left", "VP", "VA", "VC", "VE", "VV", "BA", "LB", "VCD", "VSB", "VRD", "VNV", "VCP"}});
nonTerminalInfo.put("VCD", new String[][]{{"right", "VCD", "VV", "VA", "VC", "VE"}});
nonTerminalInfo.put("VCP", new String[][]{{"right", "VCP", "VV", "VA", "VC", "VE"}});
nonTerminalInfo.put("VRD", new String[][]{{"right", "VRD", "VV", "VA", "VC", "VE"}});
nonTerminalInfo.put("VSB", new String[][]{{"right", "VSB", "VV", "VA", "VC", "VE"}});
nonTerminalInfo.put("VNV", new String[][]{{"right", "VNV", "VV", "VA", "VC", "VE"}});
nonTerminalInfo.put("VPT", new String[][]{{"right", "VNV", "VV", "VA", "VC", "VE"}}); // VNV typo for VPT? None of either in ctb4.
nonTerminalInfo.put("WHNP", new String[][]{{"right", "WHNP", "NP", "NN", "NT", "NR", "QP"}});
nonTerminalInfo.put("WHPP", new String[][]{{"left", "WHPP", "PP", "P"}});
// some POS tags apparently sit where phrases are supposed to be
nonTerminalInfo.put("CD", new String[][]{{"right", "CD"}});
nonTerminalInfo.put("NN", new String[][]{{"right", "NN"}});
nonTerminalInfo.put("NR", new String[][]{{"right", "NR"}});
// parsing. It shouldn't affect anything else because heads of preterminals are not
// generally queried - GMA
nonTerminalInfo.put("VV", new String[][]{{"left"}});
nonTerminalInfo.put("VA", new String[][]{{"left"}});
nonTerminalInfo.put("VC", new String[][]{{"left"}});
nonTerminalInfo.put("VE", new String[][]{{"left"}});
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy