cc.mallet.fst.semi_supervised.FSTConstraintUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
The newest version!
/* Copyright (C) 2011 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
package cc.mallet.fst.semi_supervised;
import java.io.BufferedReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.HashMap;
import cc.mallet.types.InstanceList;
/**
* Expectation constraint utilities for fst package.
*
* @author Gregory Druck
*/
public class FSTConstraintUtil {
public static HashMap loadGEConstraints(Reader fileReader, InstanceList data) {
HashMap constraints = new HashMap();
for (int li = 0; li < data.getTargetAlphabet().size(); li++) {
System.err.println(data.getTargetAlphabet().lookupObject(li));
}
try {
BufferedReader reader = new BufferedReader(fileReader);
String line = reader.readLine();
while (line != null) {
String[] split = line.split("\\s+");
// assume the feature name has no spaces
String featureName = split[0];
int featureIndex = data.getDataAlphabet().lookupIndex(featureName,false);
if (featureIndex == -1) {
throw new RuntimeException("Feature " + featureName + " not found in the alphabet!");
}
double[][] probs = new double[data.getTargetAlphabet().size()][2];
for (int i = 0; i < probs.length; i++) Arrays.fill(probs[i ],Double.NEGATIVE_INFINITY);
for (int index = 1; index < split.length; index++) {
String[] labelSplit = split[index].split(":");
int li = data.getTargetAlphabet().lookupIndex(labelSplit[0],false);
assert (li != -1) : labelSplit[0];
if (labelSplit[1].contains(",")) {
String[] rangeSplit = labelSplit[1].split(",");
double lower = Double.parseDouble(rangeSplit[0]);
double upper = Double.parseDouble(rangeSplit[1]);
probs[li][0] = lower;
probs[li][1] = upper;
}
else {
double prob = Double.parseDouble(labelSplit[1]);
probs[li][0] = prob;
probs[li][1] = prob;
}
}
constraints.put(featureIndex, probs);
line = reader.readLine();
}
}
catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
return constraints;
}
}