All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jmaxent.Classification Maven / Gradle / Ivy

Go to download

HeidelTime is a multilingual cross-domain temporal tagger that extracts temporal expressions from documents and normalizes them according to the TIMEX3 annotation standard.

There is a newer version: 2.2.1
Show newest version
/*
 Copyright (C) 2010 by
 * 
 * 	Cam-Tu Nguyen 
 *  [email protected] or [email protected]
 *
 *  Xuan-Hieu Phan  
 *  [email protected] 
 *
 *  College of Technology, Vietnamese University, Hanoi
 * 	Graduate School of Information Sciences, Tohoku University
 *
 * JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with  JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */

package jmaxent;

import java.io.*;
import java.util.*;

// TODO: Auto-generated Javadoc
/**
 * The Class Classification.
 */
public class Classification {

    /** The option. */
    public Option option = null;
    
    /** The data. */
    public Data data = null;
    
    /** The dict. */
    public Dictionary dict = null;
    
    /** The feagen. */
    public FeatureGen feagen = null;
    
    /** The inference. */
    public Inference inference = null;
    
    /** The model. */
    public Model model = null;
    
    /** The initialized. */
    public boolean initialized = false;
    
    /** The fin model. */
    private BufferedReader finModel = null;
    
    /** The int cps. */
    List intCps = null;
    
    /**
     * Instantiates a new classification.
     *
     * @param modelDir the model dir
     */
    public Classification(String modelDir) {
	option = new Option(modelDir);
	option.readOptions();
	
	 init();
    }
    
    /**
     * Checks if is initialized.
     *
     * @return true, if is initialized
     */
    public boolean isInitialized() {
	return initialized;
    }
    
    /**
     * Inits the.
     */
    public void init() {    
	try {
	    // open model file
	    finModel = option.openModelFile();
	    if (finModel == null) {
		System.out.println("Couldn't open model file");
		return;
	    }
	
	    data = new Data(option);
	    // read context predicate map
	    data.readCpMaps(finModel);
	    // read label map
	    data.readLbMaps(finModel);
	
	    dict = new Dictionary(option, data);
	    // read dictionary
	    dict.readDict(finModel);
	
	    feagen = new FeatureGen(option, data, dict);
	    // read features
	    feagen.readFeatures(finModel);
	
	    // create an inference object
	    inference = new Inference();
	    
	    // create a model object
	    model = new Model(option, data, dict, feagen, null, inference, null);
	    model.initInference();
	    
	    // close model file
	    finModel.close();
	    
	} catch(IOException e) {
	    System.out.println("Couldn't load the model, check the model file again");
	    System.out.println(e.toString());
	}
	
	intCps = new ArrayList();
	
	initialized = true;
    }
    
    /**
     * classify an observation.
     *
     * @param cps contains a list of context predicates
     * @return label
     */
    public String classify(String cps) {
		// cps contains a list of context predicates
	
		String modelLabel = "";
		int i;
		
		intCps.clear();		
		
		StringTokenizer strTok = new StringTokenizer(cps, " \t\r\n");	
		int count = strTok.countTokens();	
		
		for (i = 0; i < count; i++) {
		    String cpStr = strTok.nextToken();
		    Integer cpInt = (Integer)data.cpStr2Int.get(cpStr);
		    if (cpInt != null) {
			intCps.add(cpInt);
		    }
		}
		
		Observation obsr = new Observation(intCps);
		
		// classify
		inference.classify(obsr);
		
		String lbStr = (String)data.lbInt2Str.get(new Integer(obsr.modelLabel));
		if (lbStr != null) {
		    modelLabel = lbStr;
		}
	
		return modelLabel;	
    }    
    
    /**
     * Classify.
     *
     * @param cpArr the cp arr
     * @return the string
     */
    public String classify(String [] cpArr){
    	String modelLabel = "";
		//int i;
		
		intCps.clear();
		
		int curWordCp = -1;
		int dictLabel = -2;
		int dictCp = -1;
		Vector dictCps = new Vector();		
		
		for (String cpStr : cpArr) {
			Integer cpInt = (Integer)data.cpStr2Int.get(cpStr);
			
			if (cpInt != null) {
				intCps.add(cpInt);			
			
				if (cpStr.startsWith("w:0")){
					//current word	
					curWordCp = cpInt;		
				}
				else if (cpStr.startsWith("dict:0")){
					//current labels
					dictCp = cpInt;
					dictCps.add(dictCp);
					
					if (dictLabel == -1){
						//do nothing
					}
					else if (dictLabel == -2){
						//initial state
						String label = cpStr.substring("dict:0:".length());
						
						if (data.lbStr2Int.containsKey(label))
							dictLabel = (Integer) data.lbStr2Int.get(label);
						else dictLabel = -1;
					}
					else {//!=-1 && !=-2
						dictLabel = -1;
					}
				}
			}
		}
		
		//insert information about current cpid of w:0:
		if (curWordCp != -1 && dictLabel >= 0) { //in training data			
			for (int i = 0; i < 3; ++i)
				intCps.add(dictCp);			
		}
		else {
			for (int i = 0; i < dictCps.size(); ++i){
				intCps.add(dictCps.get(i));
				intCps.add(dictCps.get(i));
			}
		}
		
		//create observation and start inference
		Observation obsr = new Observation(intCps);
		obsr.curWordCp = curWordCp;
		obsr.dictLabel = dictLabel;
		
		 if (obsr.curWordCp == -1 && obsr.dictLabel >= 0){    	
		    	//not in training data and 
		    	//there is only one corresponding label in dict		
			 obsr.modelLabel = obsr.dictLabel;
		}else inference.classify(obsr);
		
		String lbStr = (String)data.lbInt2Str.get(new Integer(obsr.modelLabel));
		if (lbStr != null) {
		    modelLabel = lbStr;
		}
	
		return modelLabel;	
    }
    
    /**
     * classify a list of observation.
     *
     * @param data contains a list of cps
     * @return the list
     */
    public List classify(List data) {
		List list = new ArrayList();
		
		for (int i = 0; i < data.size(); i++) {
		    list.add(classify((String)data.get(i)));
		}
		
		return list;
    }    
    
} // end of class Classification





© 2015 - 2024 Weber Informatics LLC | Privacy Policy