All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jflexcrf.Labeling Maven / Gradle / Ivy

/*
 Copyright (C) 2010 by
 * 
 * 	Cam-Tu Nguyen	[email protected] [email protected]
 *  Xuan-Hieu Phan  [email protected] 
 
 *  College of Technology, Vietnamese University, Hanoi
 * 
 * 	Graduate School of Information Sciences
 * 	Tohoku University
 *
 *  JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with  JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */

package jflexcrf;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import jvntextpro.data.DataReader;
import jvntextpro.data.DataWriter;
import jvntextpro.data.Sentence;
import jvntextpro.data.TaggingData;

// TODO: Auto-generated Javadoc
/**
 * The Class Labeling.
 */
public class Labeling {
	
	//-----------------------------------------------
	// Member Variables
	//-----------------------------------------------
	/** The model dir. */
	private String modelDir = "";	
	
	/** The tagger maps. */
	public Maps taggerMaps = null;
	
	/** The tagger dict. */
	public Dictionary taggerDict = null;
	
	/** The tagger f gen. */
	private FeatureGen taggerFGen = null;
	
	/** The tagger vtb. */
	private Viterbi taggerVtb = null;
	
	/** The tagger model. */
	private Model taggerModel = null;
	
	/** The data tagger. */
	private TaggingData dataTagger = null;
	
	/** The data reader. */
	private DataReader dataReader = null;
	
	/** The data writer. */
	private DataWriter dataWriter = null;
	
	//-----------------------------------------------
	// Initilization
	//-----------------------------------------------
	/**
	 * Instantiates a new labeling.
	 *
	 * @param modelDir the model dir
	 * @param dataTagger the data tagger
	 * @param dataReader the data reader
	 * @param dataWriter the data writer
	 */
	public Labeling (String modelDir, TaggingData dataTagger, 
			DataReader dataReader, DataWriter dataWriter){
		init(modelDir);
		this.dataTagger = dataTagger;
		this.dataWriter = dataWriter;
		this.dataReader = dataReader;		
	}
	
	/**
	 * Inits the.
	 *
	 * @param modelDir the model dir
	 * @return true, if successful
	 */
	public boolean init(String modelDir) {
		this.modelDir = modelDir;
		
		Option taggerOpt = new Option(this.modelDir);
		if (!taggerOpt.readOptions()) {
			return false;
		}

		taggerMaps = new Maps();
		taggerDict = new Dictionary();
		taggerFGen = new FeatureGen(taggerMaps, taggerDict);
		taggerVtb = new Viterbi();

		taggerModel = new Model(taggerOpt, taggerMaps, taggerDict, taggerFGen,
				taggerVtb);
		if (!taggerModel.init()) {
			System.out.println("Couldn't load the model");
			System.out.println("Check the  and the  again");
			return false;
		}
		return true;
	}
	
	/**
	 * Sets the data reader.
	 *
	 * @param reader the new data reader
	 */
	public void setDataReader (DataReader reader){
		dataReader = reader;
	}
	
	/**
	 * Sets the data tagger.
	 *
	 * @param tagger the new data tagger
	 */
	public void setDataTagger(TaggingData tagger){
		dataTagger = tagger;
	}
	
	/**
	 * Sets the data writer.
	 *
	 * @param writer the new data writer
	 */
	public void setDataWriter(DataWriter writer){
		dataWriter = writer;
	}
	
	//---------------------------------------------------------
	// labeling methods
	//---------------------------------------------------------
	/**
	 * labeling observation sequences.
	 *
	 * @param data list of sequences with specified format which can be read by DataReader
	 * @return a list of sentences with tags annotated
	 */
	@SuppressWarnings("unchecked")
	public List seqLabeling(String data){
		List obsvSeqs = dataReader.readString(data);
		return labeling(obsvSeqs);
	}	
	
	/**
	 * labeling observation sequences.
	 *
	 * @param file the file
	 * @return  a list of sentences with tags annotated
	 */
	@SuppressWarnings("unchecked")
	public List seqLabeling(File file){
		List obsvSeqs = dataReader.readFile(file.getPath());
		return labeling(obsvSeqs);
	}
	
	/**
	 * labeling observation sequences.
	 *
	 * @param data the data
	 * @return string representing label sequences, the format is specified by writer
	 */
	@SuppressWarnings("unchecked")
	public String strLabeling(String data){
		List lblSeqs = seqLabeling(data);
		String ret = dataWriter.writeString(lblSeqs);
		return ret;
	}
	
	/**
	 * labeling observation sequences.
	 *
	 * @param file contains a list of observation sequence, this file has a format wich can be read by DataReader
	 * @return string representing label sequences, the format is specified by writer
	 */
	public String strLabeling(File file){
		List obsvSeqs = dataReader.readFile(file.getPath());
		List lblSeqs = labeling(obsvSeqs);
		String ret = dataWriter.writeString(lblSeqs);
		return ret;
	}
	
	/**
	 * Labeling.
	 *
	 * @param obsvSeqs the obsv seqs
	 * @return the list
	 */
	@SuppressWarnings("unchecked")
	private List labeling(List obsvSeqs){
		List labelSeqs = new ArrayList();
		
		for (int i = 0; i < obsvSeqs.size(); ++i){//ith sentence
			List sequence = new ArrayList();
			Sentence sentence = obsvSeqs.get(i);
			
			for (int j = 0; j < sentence.size(); ++j){//jth observation
				Observation obsv = new Observation();
				obsv.originalData = sentence.getWordAt(j);
				
				String [] strCps = dataTagger.getContext(sentence, j);
				
				ArrayList tempCpsInt = new ArrayList();

				for (int k = 0; k < strCps.length; k++) {
					Integer cpInt = (Integer) taggerMaps.cpStr2Int.get(strCps[k]);
					if (cpInt == null) {
						continue;
					}
					tempCpsInt.add(cpInt);
				}
				
				obsv.cps = new int[tempCpsInt.size()];
				for (int k = 0; k < tempCpsInt.size(); ++k){
					obsv.cps[k] = tempCpsInt.get(k).intValue();
				}
				sequence.add(obsv);
			}
			
			labelSeqs.add(sequence);
		}
		
		taggerModel.inferenceAll(labelSeqs);	
		
		//assign labels to list of sentences
		for (int i = 0; i < obsvSeqs.size(); ++i){
			Sentence sent = obsvSeqs.get(i);
			List seq = (List) labelSeqs.get(i);
			
			for (int j = 0; j < sent.size(); ++j){
				Observation obsrv = (Observation) seq.get(j);			
				String label = (String) taggerMaps.lbInt2Str.get(obsrv.modelLabel);
				
				sent.getTWordAt(j).setTag(label);
			}
		}
		
		return obsvSeqs;
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy