jflexcrf.FeatureGen Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of heideltime Show documentation
Show all versions of heideltime Show documentation
HeidelTime is a multilingual cross-domain temporal tagger that extracts temporal expressions from documents and normalizes them according to the TIMEX3 annotation standard.
/*
Copyright (C) 2010 by
*
* Cam-Tu Nguyen [email protected] [email protected]
* Xuan-Hieu Phan [email protected]
* College of Technology, Vietnamese University, Hanoi
*
* Graduate School of Information Sciences
* Tohoku University
*
* JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
package jflexcrf;
import java.io.*;
import java.util.*;
// TODO: Auto-generated Javadoc
/**
* The Class FeatureGen.
*/
public class FeatureGen {
/** The features. */
List features = null; // list of features
/** The fmap. */
Map fmap = null; // feature map
/** The maps. */
Maps maps = null; // context predicate and label maps
/** The dict. */
Dictionary dict = null; // dictionary
/**
* Instantiates a new feature gen.
*
* @param maps the maps
* @param dict the dict
*/
public FeatureGen(Maps maps, Dictionary dict) {
this.maps = maps;
this.dict = dict;
}
// adding a feature
/**
* Adds the feature.
*
* @param f the f
*/
public void addFeature(Feature f) {
f.strId2IdxAdd(fmap);
features.add(f);
}
/**
* Num features.
*
* @return the int
*/
public int numFeatures() {
if (features == null) {
return 0;
} else {
return features.size();
}
}
/**
* Read features.
*
* @param fin the fin
* @throws IOException Signals that an I/O exception has occurred.
*/
public void readFeatures(BufferedReader fin) throws IOException {
if (features != null) {
features.clear();
} else {
features = new ArrayList();
}
if (fmap != null) {
fmap.clear();
} else {
fmap = new HashMap();
}
if (eFeatures != null) {
eFeatures.clear();
} else {
eFeatures = new ArrayList();
}
if (sFeatures != null) {
sFeatures.clear();
} else {
sFeatures = new ArrayList();
}
String line;
// get the number of features
if ((line = fin.readLine()) == null) {
System.out.println("Unknown number of features");
return;
}
int numFeatures = Integer.parseInt(line);
System.out.println("Number of features: " + numFeatures);
if (numFeatures <= 0) {
System.out.println("Invalid number of features");
return;
}
System.out.println("Reading features ...");
// main loop for reading features
for (int i = 0; i < numFeatures; i++) {
line = fin.readLine();
if (line == null) {
// invalid feature line, ignore it
continue;
}
StringTokenizer strTok = new StringTokenizer(line, " ");
if (strTok.countTokens() != 3) {
// invalid feature line, ignore it
continue;
}
// create a new feature by parsing the line
Feature f = new Feature(line, maps.cpStr2Int, maps.lbStr2Int);
Integer fidx = (Integer)fmap.get(f.strId);
if (fidx == null) {
// insert the feature into the feature map
// System.out.println("\tinsert into the feature map");
fmap.put(f.strId, new Integer(f.idx));
features.add(f);
if (f.ftype == Feature.EDGE_FEATURE1) {
eFeatures.add(f);
}
}
else {
features.add(f);
//System.out.println(line + "-----------> [" + f.strId + "]");
}
}
System.out.println("Reading " + Integer.toString(features.size()) + " features completed!");
// read the line ###...
line = fin.readLine();
}
// start to scan features at a particular position in a data sequence
/**
* Start scan features at.
*
* @param seq the seq
* @param pos the pos
*/
public void startScanFeaturesAt(List seq, int pos) {
startScanSFeaturesAt(seq, pos);
startScanEFeatures();
}
/**
* Checks for next feature.
*
* @return true, if successful
*/
public boolean hasNextFeature() {
return (hasNextSFeature() || hasNextEFeature());
}
/**
* Next feature.
*
* @return the feature
*/
public Feature nextFeature() {
Feature f = null;
if (hasNextSFeature()) {
f = nextSFeature();
} else if (hasNextEFeature()) {
f = nextEFeature();
} else {
// do nothing
}
return f;
}
// start to scan state features
/** The s features. */
List sFeatures = null;
/** The s feature idx. */
int sFeatureIdx = 0;
/**
* Start scan s features at.
*
* @param seq the seq
* @param pos the pos
*/
void startScanSFeaturesAt(List seq, int pos) {
sFeatures.clear();
sFeatureIdx = 0;
Observation obsr = (Observation)seq.get(pos);
// scan over all context predicates
for (int i = 0; i < obsr.cps.length; i++) {
Element elem = (Element)dict.dict.get(new Integer(obsr.cps[i]));
if (elem == null) {
continue;
}
if (!(elem.isScanned)) {
// scan all labels for state feature
Iterator it = elem.lbCntFidxes.keySet().iterator();
while (it.hasNext()) {
Integer label = (Integer)it.next();
CountFeatureIdx cntFidx = (CountFeatureIdx)elem.lbCntFidxes.get(label);
if (cntFidx.fidx >= 0) {
Feature sF = new Feature();
sF.sFeature1Init(label.intValue(), obsr.cps[i]);
sF.idx = cntFidx.fidx;
elem.cpFeatures.add(sF);
}
}
elem.isScanned = true;
}
for (int j = 0; j < elem.cpFeatures.size(); j++) {
sFeatures.add(elem.cpFeatures.get(j));
}
}
}
/**
* Checks for next s feature.
*
* @return true, if successful
*/
boolean hasNextSFeature() {
return (sFeatureIdx < sFeatures.size());
}
/**
* Next s feature.
*
* @return the feature
*/
Feature nextSFeature() {
Feature sF = (Feature)sFeatures.get(sFeatureIdx);
sFeatureIdx++;
return sF;
}
// start to scan edge features
/** The e features. */
List eFeatures = null;
/** The e feature idx. */
int eFeatureIdx = 0;
/**
* Start scan e features.
*/
void startScanEFeatures() {
eFeatureIdx = 0;
}
/**
* Checks for next e feature.
*
* @return true, if successful
*/
boolean hasNextEFeature() {
return (eFeatureIdx < eFeatures.size());
}
/**
* Next e feature.
*
* @return the feature
*/
Feature nextEFeature() {
Feature eF = (Feature)eFeatures.get(eFeatureIdx);
eFeatureIdx++;
return eF;
}
} // end of class FeatureGen
© 2015 - 2024 Weber Informatics LLC | Privacy Policy