edu.stanford.nlp.maxent.Features Maven / Gradle / Ivy
Show all versions of stanford-parser Show documentation
/*
* Title: StanfordMaxEnt
* Description: A Maximum Entropy Toolkit
* Copyright: Copyright (c) Kristina Toutanova
* Company: Stanford University
*/
package edu.stanford.nlp.maxent;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
/**
* An ArrayList of Feature.
*
* @author Kristina Toutanova
* @version 1.0
*/
public class Features {
// todo [cdm 2018]: Probably this class can just be removed! Use ArrayList
/** A logger for this class */
private static final Redwood.RedwoodChannels log = Redwood.channels(Features.class);
private ArrayList f = new ArrayList<>();
private static final int maxValue = 11000000;
public Features() {
}
public void add(Feature m) {
f.add(m);
}
public void removeLast() {
f.remove(f.size() - 1);
}
public Feature get(int index) {
return f.get(index);
}
public int size() {
return f.size();
}
public Experiments domain() {
get(0);
return Feature.domain;
}
public void clean() {
}
public void print() {
for (int i = 0; i < size(); i++) {
get(i).print();
}
}
/**
* reads in the features from a file, having already read the
* experiments
*/
public Features(String filename, Experiments domain) {
Exception e1 = new Exception("Incorrect data file format!");
Index instanceIndex = domain.createIndex();
try (BufferedReader in = new BufferedReader(new FileReader(filename))) {
String s;
while (true) {
s = in.readLine();
if (s.equals("")) {
break;
}
}
if (s == null) {
throw e1;
}
s = in.readLine();
if (!s.startsWith("")) {
throw e1;
}
if (!s.endsWith(" ")) {
throw e1;
}
int index1 = s.indexOf(">");
int index2 = s.lastIndexOf("<");
String fSt = s.substring(index1 + 1, index2);
System.out.println(fSt);
int number = Integer.parseInt(fSt);
System.out.println("fSize is " + number);
int[] arrIndexes = new int[maxValue];
double[] arrValues = new double[maxValue];
for (int f = 0; f < number; f++) {
String line = in.readLine();
int indSp = -1;
int current = 0;
while ((indSp = line.indexOf(" ")) > -1) {
int x = Integer.parseInt(line.substring(0, indSp));
line = line.substring(indSp + 1);
indSp = line.indexOf(" ");
if (indSp == -1) {
indSp = line.length();
}
int y = Integer.parseInt(line.substring(0, indSp));
line = line.substring(indSp + 1);
indSp = line.indexOf(" ");
if (indSp == -1) {
indSp = line.length();
}
double val = Double.parseDouble(line.substring(0, indSp));
if (indSp < line.length()) {
line = line.substring(indSp + 1);
}
arrIndexes[current] = instanceIndex.indexOf(new IntPair(x, y));
arrValues[current] = val;
current++;
}
int[] indValues = new int[current];
double[] values = new double[current];
for (int j = 0; j < current; j++) {
indValues[j] = arrIndexes[j];
values[j] = arrValues[j];
}
Feature bf = new Feature(domain, indValues, values, instanceIndex);
this.add(bf);
}// for f
} catch (Exception e) {
log.warn(e);
}
}
}