All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.fst.semi_supervised.FSTConstraintUtil Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
/* Copyright (C) 2011 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

package cc.mallet.fst.semi_supervised;

import java.io.BufferedReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.HashMap;

import cc.mallet.types.InstanceList;

/**
 * Expectation constraint utilities for fst package.
 *
 * @author Gregory Druck
 */

public class FSTConstraintUtil {

  public static HashMap loadGEConstraints(Reader fileReader, InstanceList data) { 
    HashMap constraints = new HashMap();
    
    for (int li = 0; li < data.getTargetAlphabet().size(); li++) {
      System.err.println(data.getTargetAlphabet().lookupObject(li));
    }
    
    try {
      BufferedReader reader = new BufferedReader(fileReader);
      String line = reader.readLine();
      while (line != null) {
        String[] split = line.split("\\s+");
        
        // assume the feature name has no spaces
        String featureName = split[0];
        int featureIndex = data.getDataAlphabet().lookupIndex(featureName,false);
        if (featureIndex == -1) { 
          throw new RuntimeException("Feature " + featureName + " not found in the alphabet!");
        }
        
        double[][] probs = new double[data.getTargetAlphabet().size()][2];
        for (int i = 0; i < probs.length; i++) Arrays.fill(probs[i ],Double.NEGATIVE_INFINITY);
        for (int index = 1; index < split.length; index++) {
          String[] labelSplit = split[index].split(":");   
          
          int li = data.getTargetAlphabet().lookupIndex(labelSplit[0],false);
          assert (li != -1) : labelSplit[0];
          
          if (labelSplit[1].contains(",")) {
            String[] rangeSplit = labelSplit[1].split(",");
            double lower = Double.parseDouble(rangeSplit[0]);
            double upper = Double.parseDouble(rangeSplit[1]);
            probs[li][0] = lower;
            probs[li][1] = upper;
          }
          else {
            double prob = Double.parseDouble(labelSplit[1]);
            probs[li][0] = prob;
            probs[li][1] = prob;
          }
        }
        constraints.put(featureIndex, probs);
        line = reader.readLine();
      }
    }
    catch (Exception e) {  
      e.printStackTrace();
      System.exit(1);
    }
    return constraints;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy