examples.FullPipelineSpanish Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of anna Show documentation
Tools for Natural Language Analysis, Generation and Machine Learning
The newest version!
package examples;

import is2.data.SentenceData09;
import is2.io.CONLLWriter09;
import is2.lemmatizer.Lemmatizer;

import is2.parser.Parser;
import is2.tag.Tagger;
import is2.tools.Tool;

import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;

/**
 * @author Bernd Bohnet, 13.09.2010
 * 
 * Illustrates the application the full pipeline: lemmatizer, morphologic, tagger, and parser
 */
public class FullPipelineSpanish {

	
	//	shows how to parse a sentences and call the tools
	public static void main(String[] args) throws IOException {
		
		// Create a data container for a sentence
		SentenceData09 i = new SentenceData09();

		if (args.length==1) { // input might be a sentence: "This is another test ." 
			StringTokenizer st = new StringTokenizer(args[0]);
			ArrayList forms = new ArrayList();
			
			forms.add("");
			while(st.hasMoreTokens()) forms.add(st.nextToken());
			
			i.init(forms.toArray(new String[0]));
			
		} else {
			// provide a default sentence: Haus has a mutated vowel
			i.init(new String[] {"","También","estuve","emocionado","pero","no","pude","imaginar","mi","vida","sin","la",
					"gente","tan","intima","a","mí","."});

		}

		// lemmatizing
		
		System.out.println("\nReading the model of the lemmatizer");
		Tool lemmatizer = new Lemmatizer("models/lemma-spa.model");  // create a lemmatizer
			
		System.out.println("Applying the lemmatizer");
		lemmatizer.apply(i);

		System.out.print(i.toString());
		System.out.print("Lemmata: "); for (String l : i.plemmas) System.out.print(l+" "); System.out.println();
		
		// morphologic tagging
		
		System.out.println("\nReading the model of the morphologic tagger");
		is2.mtag.Tagger morphTagger = new is2.mtag.Tagger("models/mtag-spa.model");
		
		System.out.println("\nApplying the morpholoigc tagger");
		morphTagger.apply(i);
		
		System.out.print(i.toString());
		System.out.print("Morph: "); for (String f : i.pfeats) System.out.print(f+" "); System.out.println();
	
		// part-of-speech tagging
		
		System.out.println("\nReading the model of the part-of-speech tagger");
		Tool tagger = new Tagger("models/tag-spa.model");

		System.out.println("\nApplying the part-of-speech tagger");
		tagger.apply(i);

		System.out.print(i.toString());
		System.out.print("Part-of-Speech tags: "); for (String p : i.ppos) System.out.print(p+" "); System.out.println();
		
		// parsing
		
		System.out.println("\nReading the model of the dependency parser");
		Tool parser = new Parser("models/prs-spa.model");

		System.out.println("\nApplying the parser");
		parser.apply(i);
		
		System.out.println(i.toString());

		// write the result to a file
		
		CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
		
		writer.write(i, CONLLWriter09.NO_ROOT);
		writer.finishWriting();

	}

	
}