All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.DateTreeTransformer Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

The newest version!
package edu.stanford.nlp.trees;

import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;

/**
 * Flattens the following two structures:
 * 
* (NP (NP (NNP Month) (CD Day) ) * (, ,) * (NP (CD Year) )) *
* becomes *
* (NP (NNP Month) (CD Day) (, ,) (CD Year) ) *
* (NP (NP (NNP Month) ) * (NP (CD Year) )) *
* becomes *
* (NP (NNP Month) (CD Year)) * * @author John Bauer */ public class DateTreeTransformer implements TreeTransformer { static final String MONTH_REGEX = "January|February|March|April|May|June|July|August|September|October|November|December|Jan\\.|Feb\\.|Mar\\.|Apr\\.|Aug\\.|Sep\\.|Sept\\.|Oct\\.|Nov\\.|Dec\\."; static final TregexPattern tregexMonthYear = TregexPatternCompiler.defaultCompiler.compile("NP=root <1 (NP <: (NNP=month <: /" + MONTH_REGEX + "/)) <2 (NP=yearnp <: (CD=year <: __)) : =root <- =yearnp"); static final TregexPattern tregexMonthDayYear = TregexPatternCompiler.defaultCompiler.compile("NP=root <1 (NP=monthdayroot <1 (NNP=month <: /" + MONTH_REGEX +"/) <2 (CD=day <: __)) <2 (/^,$/=comma <: /^,$/) <3 (NP=yearroot <: (CD=year <: __)) : (=root <- =yearroot) : (=monthdayroot <- =day)"); public Tree transformTree(Tree t) { TregexMatcher matcher = tregexMonthYear.matcher(t); while (matcher.find()) { Tree root = matcher.getNode("root"); Tree month = matcher.getNode("month"); Tree year = matcher.getNode("year"); Tree[] children = new Tree[] {month, year}; root.setChildren(children); matcher = tregexMonthYear.matcher(t); } matcher = tregexMonthDayYear.matcher(t); while (matcher.find()) { Tree root = matcher.getNode("root"); Tree month = matcher.getNode("month"); Tree day = matcher.getNode("day"); Tree comma = matcher.getNode("comma"); Tree year = matcher.getNode("year"); Tree[] children = new Tree[] {month, day, comma, year}; root.setChildren(children); matcher = tregexMonthDayYear.matcher(t); } return t; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy