All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jflexcrf.FeatureGen Maven / Gradle / Ivy

Go to download

HeidelTime is a multilingual cross-domain temporal tagger that extracts temporal expressions from documents and normalizes them according to the TIMEX3 annotation standard.

There is a newer version: 2.2.1
Show newest version
/*
 Copyright (C) 2010 by
 * 
 * 	Cam-Tu Nguyen	[email protected] [email protected]
 *  Xuan-Hieu Phan  [email protected] 
 
 *  College of Technology, Vietnamese University, Hanoi
 * 
 * 	Graduate School of Information Sciences
 * 	Tohoku University
 *
 *  JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with  JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */

package jflexcrf;

import java.io.*;
import java.util.*;

// TODO: Auto-generated Javadoc
/**
 * The Class FeatureGen.
 */
public class FeatureGen {
    
    /** The features. */
    List features = null;	// list of features
    
    /** The fmap. */
    Map fmap = null;		// feature map
    
    /** The maps. */
    Maps maps = null;		// context predicate and label maps
    
    /** The dict. */
    Dictionary dict = null;	// dictionary
        
    /**
     * Instantiates a new feature gen.
     *
     * @param maps the maps
     * @param dict the dict
     */
    public FeatureGen(Maps maps, Dictionary dict) {
	this.maps = maps;
	this.dict = dict;	
    }
    
    // adding a feature
    /**
     * Adds the feature.
     *
     * @param f the f
     */
    public void addFeature(Feature f) {
	f.strId2IdxAdd(fmap);
	features.add(f);
    }
    
    /**
     * Num features.
     *
     * @return the int
     */
    public int numFeatures() {
	if (features == null) {
	    return 0;
	} else {
	    return features.size();
	}
    }
    
    /**
     * Read features.
     *
     * @param fin the fin
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public void readFeatures(BufferedReader fin) throws IOException {
	if (features != null) {
	    features.clear();
	} else {
	    features = new ArrayList();
	}
	
	if (fmap != null) {
	    fmap.clear(); 
	} else {
	    fmap = new HashMap();
	}
	
	if (eFeatures != null) {
	    eFeatures.clear();
	} else {
	    eFeatures = new ArrayList();
	}
	
	if (sFeatures != null) {
	    sFeatures.clear();
	} else {
	    sFeatures = new ArrayList();
	}
	
	String line;
	
	// get the number of features
	if ((line = fin.readLine()) == null) {
	    System.out.println("Unknown number of features");
	    return;
	}
	int numFeatures = Integer.parseInt(line);
        System.out.println("Number of features: " + numFeatures);
	if (numFeatures <= 0) {
	    System.out.println("Invalid number of features");
	    return;
	}
	
	System.out.println("Reading features ...");
	
	// main loop for reading features
	for (int i = 0; i < numFeatures; i++) {
	    line = fin.readLine();
	    if (line == null) {
		// invalid feature line, ignore it
		continue;
	    }
	    
	    StringTokenizer strTok = new StringTokenizer(line, " ");
	    if (strTok.countTokens() != 3) {
		// invalid feature line, ignore it
		continue;
	    }
	    
	    // create a new feature by parsing the line
	    Feature f = new Feature(line, maps.cpStr2Int, maps.lbStr2Int);
	    
	    Integer fidx = (Integer)fmap.get(f.strId);
	    if (fidx == null) {
		// insert the feature into the feature map
        //        System.out.println("\tinsert into the feature map");
		fmap.put(f.strId, new Integer(f.idx));
		features.add(f);
		
		if (f.ftype == Feature.EDGE_FEATURE1) {
		    eFeatures.add(f);
		}
	    }
	
            else {
            	features.add(f);
                  //System.out.println(line + "-----------> [" + f.strId + "]");            
            }      
            
	}
	
	System.out.println("Reading " + Integer.toString(features.size()) + " features completed!");
	
	// read the line ###...
	line = fin.readLine();
    }
    
    // start to scan features at a particular position in a data sequence
    /**
     * Start scan features at.
     *
     * @param seq the seq
     * @param pos the pos
     */
    public void startScanFeaturesAt(List seq, int pos) {
	startScanSFeaturesAt(seq, pos);
	startScanEFeatures();
    }
    
    /**
     * Checks for next feature.
     *
     * @return true, if successful
     */
    public boolean hasNextFeature() {
	return (hasNextSFeature() || hasNextEFeature());
    }
    
    /**
     * Next feature.
     *
     * @return the feature
     */
    public Feature nextFeature() {
	Feature f = null;
    
	if (hasNextSFeature()) {
	    f = nextSFeature();
	} else if (hasNextEFeature()) {
	    f = nextEFeature();
	} else {
	    // do nothing
	}
	
	return f;
    }
    
    // start to scan state features
    /** The s features. */
    List sFeatures = null;
    
    /** The s feature idx. */
    int sFeatureIdx = 0;
    
    /**
     * Start scan s features at.
     *
     * @param seq the seq
     * @param pos the pos
     */
    void startScanSFeaturesAt(List seq, int pos) {	
	sFeatures.clear();
	sFeatureIdx = 0;
	
	Observation obsr = (Observation)seq.get(pos);
	    
	// scan over all context predicates
	for (int i = 0; i < obsr.cps.length; i++) {
	    Element elem = (Element)dict.dict.get(new Integer(obsr.cps[i]));
	    if (elem == null) {
		continue;
	    }
	    
	    if (!(elem.isScanned)) {
		// scan all labels for state feature
		Iterator it = elem.lbCntFidxes.keySet().iterator();
		while (it.hasNext()) {
		    Integer label = (Integer)it.next();
		    CountFeatureIdx cntFidx = (CountFeatureIdx)elem.lbCntFidxes.get(label);

		    if (cntFidx.fidx >= 0) {
			Feature sF = new Feature();
			sF.sFeature1Init(label.intValue(), obsr.cps[i]);
			sF.idx = cntFidx.fidx;
			
			elem.cpFeatures.add(sF);
		    }	    
		}		
		
		elem.isScanned = true;
	    }
	    
	    for (int j = 0; j < elem.cpFeatures.size(); j++) {
		sFeatures.add(elem.cpFeatures.get(j));
	    }
	}		
    }    
    
    /**
     * Checks for next s feature.
     *
     * @return true, if successful
     */
    boolean hasNextSFeature() {
	return (sFeatureIdx < sFeatures.size());
    }
    
    /**
     * Next s feature.
     *
     * @return the feature
     */
    Feature nextSFeature() {
	Feature sF = (Feature)sFeatures.get(sFeatureIdx);
	sFeatureIdx++;
	return sF;
    }
    
    // start to scan edge features
    /** The e features. */
    List eFeatures = null;
    
    /** The e feature idx. */
    int eFeatureIdx = 0;
    
    /**
     * Start scan e features.
     */
    void startScanEFeatures() {
	eFeatureIdx = 0;
    }
    
    /**
     * Checks for next e feature.
     *
     * @return true, if successful
     */
    boolean hasNextEFeature() {
	return (eFeatureIdx < eFeatures.size());
    }
    
    /**
     * Next e feature.
     *
     * @return the feature
     */
    Feature nextEFeature() {
	Feature eF = (Feature)eFeatures.get(eFeatureIdx);
	eFeatureIdx++;
	return eF;
    }
    
} // end of class FeatureGen





© 2015 - 2024 Weber Informatics LLC | Privacy Policy