All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unihd.dbs.uima.consumer.tempeval2writer.Tempeval2Writer Maven / Gradle / Ivy

Go to download

HeidelTime is a multilingual cross-domain temporal tagger that extracts temporal expressions from documents and normalizes them according to the TIMEX3 annotation standard.

There is a newer version: 2.2.1
Show newest version
/*
 * Tempeval2Writer.java
 * 
 * Copyright (c) 2011, Database Research Group, Institute of Computer Science, University of Heidelberg. 
 * All rights reserved. This program and the accompanying materials 
 * are made available under the terms of the GNU General Public License.
 * 
 * author: Jannik Strötgen
 * email:  [email protected]
 * 
 * TempEval2 Writer create files according to the TempEval-2 style.
 * For details, see http://dbs.ifi.uni-heidelberg.de/heideltime
 */

package de.unihd.dbs.uima.consumer.tempeval2writer;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;

import de.unihd.dbs.uima.types.heideltime.Timex3;

/**
 * 
 * @author jstroetgen
 *
 */
public class Tempeval2Writer extends CasConsumer_ImplBase {
	public static final String PARAM_OUTPUTDIR = "OutputDir";

	private File mOutputDir;
	
	/**
	 * initialize
	 */
	public void initialize() throws ResourceInitializationException {
		mOutputDir = new File((String) getConfigParameterValue(PARAM_OUTPUTDIR));
		if (!mOutputDir.exists()) {
			mOutputDir.mkdirs();
		}
	}

	/**
	 * process
	 */
	public void processCas(CAS aCAS) throws ResourceProcessException {

		JCas jcas;
		try {
			jcas = aCAS.getJCas();
		} catch (CASException e) {
			throw new ResourceProcessException(e);
		}

		printTimexAnnotations(jcas);
	}
	
	
	public void printTimexAnnotations(JCas jcas) {
		File outExtents = new File(mOutputDir, "timex-extents.tab");
		File outAttributes = new File(mOutputDir, "timex-attributes.tab");
		
		// get timex index
		FSIndex indexTimex   = jcas.getAnnotationIndex(Timex3.type);
		FSIterator iterTimex = indexTimex.iterator();
		
		while (iterTimex.hasNext()){
			Timex3 t = (Timex3) iterTimex.next();
			if (!((t.getType().toString().equals("de.unihd.dbs.uima.heidopp.types.tempeval2.GoldTimex3")))){
				
				// output extents
				String toPrintExtents    = "";
				String[] allTokList = t.getAllTokIds().split("<-->");
				for (int i=1; i < allTokList.length; i++){
					toPrintExtents    = toPrintExtents+t.getFilename()+"\t"+t.getSentId()+"\t"+allTokList[i]+
					"\ttimex3\t"+t.getTimexId()+"\t1\n";					
				}
				
				try {
					BufferedWriter bf = new BufferedWriter(new FileWriter(outExtents, true));
					bf.write(toPrintExtents);
					bf.close();
				} catch (IOException e) {
					e.printStackTrace();
				}

				// output attributes
				String toPrintAttributes = t.getFilename()+"\t"+t.getSentId()+"\t"+t.getFirstTokId()+
											"\ttimex3\t"+t.getTimexId()+"\t1\ttype\t"+t.getTimexType()+"\n";
				toPrintAttributes += t.getFilename()+"\t"+t.getSentId()+"\t"+t.getFirstTokId()+
											"\ttimex3\t"+t.getTimexId()+"\t1\tvalue\t"+t.getTimexValue()+"\n";
	
				try {
					BufferedWriter bf = new BufferedWriter(new FileWriter(outAttributes, true));
					bf.write(toPrintAttributes);
					bf.close();
				} catch (IOException e1) {
					e1.printStackTrace();
				}
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy