All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.cluster.tui.Clusterings2Info Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

There is a newer version: 2.0.12
Show newest version
package cc.mallet.cluster.tui;


import java.io.FileInputStream;
import java.io.ObjectInputStream;
import java.util.logging.Logger;

import cc.mallet.cluster.Clustering;
import cc.mallet.cluster.Clusterings;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;

//In progress
public class Clusterings2Info {

	private static Logger logger =
			MalletLogger.getLogger(Clusterings2Info.class.getName());

	public static void main (String[] args) {
		CommandOption
									.setSummary(Clusterings2Info.class,
															"A tool to print statistics about a Clusterings.");
		CommandOption.process(Clusterings2Info.class, args);

		Clusterings clusterings = null;
		try {
			ObjectInputStream iis =
					new ObjectInputStream(new FileInputStream(inputFile.value));
			clusterings = (Clusterings) iis.readObject();
		} catch (Exception e) {
			System.err.println("Exception reading clusterings from "
													+ inputFile.value + " " + e);
			e.printStackTrace();
		}

		if (printOption.value) {
			for (int i = 0; i < clusterings.size(); i++) {
				Clustering c = clusterings.get(i);
				for (int j = 0; j < c.getNumClusters(); j++) {
					InstanceList cluster = c.getCluster(j);
					for (int k = 0; k < cluster.size(); k++) {
						System.out.println("clustering " + i + " cluster " + j + " element " + k + " " + cluster.get(k).getData());
					}
					System.out.println();
				}
			}
		}
		logger.info("number clusterings=" + clusterings.size());

		int totalInstances = 0;
		int totalClusters = 0;

		for (int i = 0; i < clusterings.size(); i++) {
			Clustering c = clusterings.get(i);
			totalClusters += c.getNumClusters();
			totalInstances += c.getNumInstances();
		}
		logger.info("total instances=" + totalInstances);
		logger.info("total clusters=" + totalClusters);
		logger.info("instances per clustering=" + (double) totalInstances
								/ clusterings.size());
		logger.info("instances per cluster=" + (double) totalInstances
								/ totalClusters);
		logger.info("clusters per clustering=" + (double) totalClusters
								/ clusterings.size());
	}

	static CommandOption.String inputFile =
			new CommandOption.String(
																Clusterings2Info.class,
																"input",
																"FILENAME",
																true,
																"text.vectors",
																"The filename from which to read the list of instances.",
																null);

	static CommandOption.Boolean printOption = 
			new CommandOption.Boolean(Clusterings2Info.class,
			                          "print",
			                          "BOOLEAN",			                          	
			                          false,
			                          false,
			                          "If true, print all clusters",
			                          null);
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy