All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.cluster.Record Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.cluster;

import gnu.trove.TIntObjectHashMap;

import java.io.Serializable;

import cc.mallet.types.Alphabet;
import cc.mallet.types.AugmentableFeatureVector;
import cc.mallet.types.FeatureVector;

public class Record implements Serializable {

	private static final long serialVersionUID = 1L;

	Alphabet fieldAlph;

	Alphabet valueAlph;

	TIntObjectHashMap field2values;

	public Record (Alphabet fieldAlph, Alphabet valueAlph) {
		this.fieldAlph = fieldAlph;
		this.valueAlph = valueAlph;
		field2values = new TIntObjectHashMap();
	}

	public Record (Alphabet fieldAlph, Alphabet valueAlph, String[][] vals) {
		this(fieldAlph, valueAlph);
		for (int i = 0; i < vals.length; i++) {
			AugmentableFeatureVector afv = new AugmentableFeatureVector(valueAlph, false);
			for (int j = 1; j < vals[i].length; j++)
				afv.add(valueAlph.lookupIndex(vals[i][j]), 1.0);
			field2values.put(fieldAlph.lookupIndex(vals[i][0]), afv.toFeatureVector());
		}
	}
	
	public FeatureVector values (String field) {
		return values(fieldAlph.lookupIndex(field));
	}

	public FeatureVector values (int field) {
		return (FeatureVector) field2values.get(field);
	}
	
	public int value (String field) {
		return value(fieldAlph.lookupIndex(field));
	}
	
	public int value (int field) {
		FeatureVector fv = values(field);
		return (fv == null) ? -1 : fv.indexAtLocation(0);
	}
	
	public int[] fields () { return field2values.keys(); }
	
	public Alphabet fieldAlphabet () { return this.fieldAlph; }
	
	public Alphabet valueAlphabet () { return this.valueAlph; }

	public String toString () { return toString(true); }
	
	public String toString (boolean oneLine) {
		StringBuffer b = new StringBuffer();
		int[] keys = field2values.keys();
		for (int i = 0; i < keys.length; i++) {
			b.append(fieldAlph.lookupObject(keys[i]) + "=");
			FeatureVector v = (FeatureVector) field2values.get(keys[i]);
			for (int j = 0; j < v.numLocations(); j++)
				b.append(valueAlph.lookupObject(v.indexAtLocation(j)) + ",");
			if (!oneLine) b.append("\n");
		}
		return b.toString();
	}

	public static void main (String[] args) {
		Record r =
				new Record(new Alphabet(), new Alphabet(),
										new String[][] { { "field1", "f1v1", "f1v2" },
																		{ "field2", "f2v1" },
																		{ "field3", "f3v1", "f3v2", "f3v3" } });
		System.out.println(r);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy