edu.stanford.nlp.trees.international.french.FrenchHeadFinder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.trees.international.french;
import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.Generics;
/**
* TODO wsg2010: Compare these head finding rules to those found in Arun Abishek's
* master's thesis.
*
* @author mcdm
*/
public class FrenchHeadFinder extends AbstractCollinsHeadFinder {
public FrenchHeadFinder() {
this(new FrenchTreebankLanguagePack());
}
public FrenchHeadFinder(FrenchTreebankLanguagePack tlp) {
super(tlp);
//French POS:
// A (adjective), ADV (adverb), C (conjunction and subordinating conjunction), CL (clitics),
// CS (subordinating conjunction) but occurs only once!,
// D (determiner), ET (foreign word), I (interjection), N (noun),
// P (preposition), PREF (prefix), PRO (strong pronoun -- very confusing), V (verb), PUNC (punctuation)
nonTerminalInfo = Generics.newHashMap();
// "sentence"
nonTerminalInfo.put(tlp.startSymbol(), new String[][]{{"left", "VN", "NP"}, {"left"}});
nonTerminalInfo.put("SENT", new String[][]{{"left", "VN", "NP"}, {"left"}});
// adjectival phrases
nonTerminalInfo.put("AP", new String[][]{{"left", "A", "V"}, {"rightdis", "N", "ET"}, {"left"}});
// adverbial phrases
nonTerminalInfo.put("AdP", new String[][]{{"right", "ADV"}, {"left", "N"}, {"right"}});
// coordinated phrases
nonTerminalInfo.put("COORD", new String[][]{{"leftdis", "C", "CC", "ADV", "PP", "P"}, {"left"}});
// noun phrases
nonTerminalInfo.put("NP", new String[][]{{"rightdis", "N", "PRO", "NP", "A"}, {"right", "ET"}, {"right"}});
// prepositional phrases
nonTerminalInfo.put("PP", new String[][]{{"left", "P", "PRO", "A", "NP", "V", "PP", "ADV"}, {"left"}});
// verbal nucleus
nonTerminalInfo.put("VN", new String[][]{{"right", "V", "VN"}, {"right"}});
// infinitive clauses
nonTerminalInfo.put("VPinf", new String[][]{{"left", "VN", "V"}, {"left"}});
// nonfinite clauses
nonTerminalInfo.put("VPpart", new String[][]{{"left", "VN", "V", "AP", "A", "AdP", "VPpart"}, {"left"}});
// relative clauses
nonTerminalInfo.put("Srel", new String[][]{{"left", "NP", "PRO", "PP", "C", "ADV"}});
// subordinate clauses
nonTerminalInfo.put("Ssub", new String[][]{{"left", "C", "PC", "ADV", "P", "PP"}, {"left"}});
// parenthetical clauses
nonTerminalInfo.put("Sint", new String[][]{{"left", "VN", "V", "NP", "Sint", "Ssub", "PP"}, {"left"}});
// adverbes
nonTerminalInfo.put("ADV", new String[][] {{"left", "ADV", "PP", "P"}});
// compound categories: start with MW: D, A, C, N, ADV, V, P, PRO, CL
nonTerminalInfo.put("MWD", new String[][] {{"left", "D"}, {"left"}});
nonTerminalInfo.put("MWA", new String[][] {{"left", "P"}, {"left", "N"}, {"right", "A"}, {"right"}});
nonTerminalInfo.put("MWC", new String[][] {{"left", "C", "CS"}, {"left"}});
nonTerminalInfo.put("MWN", new String[][] {{"right", "N", "ET"}, {"right"}});
nonTerminalInfo.put("MWV", new String[][] {{"left", "V"}, {"left"}});
nonTerminalInfo.put("MWP", new String[][] {{"left", "P", "ADV", "PRO"}, {"left"}});
nonTerminalInfo.put("MWPRO", new String[][] {{"left", "PRO", "CL", "N", "A"}, {"left"}});
nonTerminalInfo.put("MWCL", new String[][] {{"left", "CL"}, {"right"}});
nonTerminalInfo.put("MWADV", new String[][] {{"left", "P", "ADV"}, {"left"}});
nonTerminalInfo.put("MWI", new String[][] {{"left", "N", "ADV", "P"}, {"left"}});
nonTerminalInfo.put("MWET", new String[][] {{"left", "ET", "N"}, {"left"}});
//TODO: wsg2011: For phrasal nodes that lacked a label.
nonTerminalInfo.put(FrenchXMLTreeReader.MISSING_PHRASAL, new String[][]{{"left"}});
}
/**
* Go through trees and determine their heads and print them.
* Just for debugging.
* Usage:
* java edu.stanford.nlp.trees.FrenchHeadFinder treebankFilePath
*
*
* @param args The treebankFilePath
*/
public static void main(String[] args) {
Treebank treebank = new DiskTreebank();
CategoryWordTag.suppressTerminalDetails = true;
treebank.loadPath(args[0]);
final HeadFinder chf = new FrenchHeadFinder();
treebank.apply(pt -> {
pt.percolateHeads(chf);
pt.pennPrint();
System.out.println();
});
}
private static final long serialVersionUID = 8747319554557223422L;
}