weka.attributeSelection.LatentSemanticAnalysis Maven / Gradle / Ivy

Go to download
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    LatentSemanticAnalysis.java
 *    Copyright (C) 2008 Amri Napolitano
 *
 */

package weka.attributeSelection;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Check;
import weka.core.CheckOptionHandler;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.matrix.Matrix;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.matrix.SingularValueDecomposition;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Enumeration;
import java.util.Vector;

/**
 
 * Performs latent semantic analysis and transformation of the data. 
 * Use in conjunction with a Ranker search. A low-rank approximation 
 * of the full data is found by specifying the number of singular values 
 * to use. The dataset may be transformed to give the relation of either 
 * the attributes or the instances (default) to the concept space created 
 * by the transformation.
 * 
 
 *
 
 * Valid options are: 

 * 
 * 
 -N
 *  Normalize input data.
 * 
 *  -R
 *  Rank approximation used in LSA. May be actual number of 
 *  LSA attributes to include (if greater than 1) or a proportion 
 *  of total singular values to account for (if between 0 and 1). 
 *  A value less than or equal to zero means use all latent variables.
 *  (default = 0.95)
 * 
 *  -A
 *  Maximum number of attributes to include in 
 *  transformed attribute names. (-1 = include all)
 * 
 
 *
 * @author Amri Napolitano
 * @version $Revision: 8108 $
 */

public class LatentSemanticAnalysis 
extends UnsupervisedAttributeEvaluator 
implements AttributeTransformer, OptionHandler {
  
  /** For serialization */
  static final long serialVersionUID = -8712112988018106198L;
  
  /** The data to transform analyse/transform */
  private Instances m_trainInstances;
  
  /** 
   * Keep a copy for the class attribute (if set) and for 
   * checking for header compatibility 
   */
  private Instances m_trainHeader;
  
  /** The header for the transformed data format */
  private Instances m_transformedFormat;
  
  /** Data has a class set */
  private boolean m_hasClass;
  
  /** Class index */
  private int m_classIndex;
  
  /** Number of attributes */
  private int m_numAttributes;
  
  /** Number of instances */
  private int m_numInstances;
  
  /** Is transpose necessary because numAttributes < numInstances? */
  private boolean m_transpose = false;
  
  /** Will hold the left singular vectors */
  private Matrix m_u = null;
  
  /** Will hold the singular values */
  private Matrix m_s = null;
  
  /** Will hold the right singular values */
  private Matrix m_v = null;
  
  /** Will hold the matrix used to transform instances to the new feature space */
  private Matrix m_transformationMatrix = null;
  
  /** Filters for original data */
  private ReplaceMissingValues m_replaceMissingFilter;
  private Normalize m_normalizeFilter;
  private NominalToBinary m_nominalToBinaryFilter;
  private Remove m_attributeFilter;
  
  /** The number of attributes in the LSA transformed data */
  private int m_outputNumAttributes = -1;
  
  /** Normalize the input data? */
  private boolean m_normalize = false;
  
  /** The approximation rank to use (between 0 and 1 means coverage proportion) */
  private double m_rank = 0.95;
  
  /** The sum of the squares of the singular values */
  private double m_sumSquaredSingularValues = 0.0;
  
  /** The actual rank number to use for computation */
  private int m_actualRank = -1;
  
  /** Maximum number of attributes in the transformed attribute name */
  private int m_maxAttributesInName = 5;
  
  /**
   * Returns a string describing this attribute transformer
   * @return a description of the evaluator suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Performs latent semantic analysis and transformation of the data. Use in " +
            "conjunction with a Ranker search. A low-rank approximation of the full data is " +
            "found by either specifying the number of singular values to use or specifying a " +
            "proportion of the singular values to cover.";
  }
  
  /**
   * Returns an enumeration describing the available options. 
   *
   * @return an enumeration of all the available options.
   **/
  public Enumeration listOptions () {
    Vector options = new Vector(4);
    options.addElement(new Option("\tNormalize input data.", "N", 0, "-N"));
    
    options.addElement(new Option("\tRank approximation used in LSA. \n" +
                                   "\tMay be actual number of LSA attributes \n" +
                                   "\tto include (if greater than 1) or a \n" +
                                   "\tproportion of total singular values to \n" +
                                   "\taccount for (if between 0 and 1). \n" +
                                   "\tA value less than or equal to zero means \n" +
                                   "\tuse all latent variables.(default = 0.95)",
                                   "R",1,"-R"));
    
    options.addElement(new Option("\tMaximum number of attributes to include\n" +
                                   "\tin transformed attribute names.\n" +
                                   "\t(-1 = include all)"
                                   , "A", 1, "-A"));
    return  options.elements();
  }
  
  /**
   * Parses a given list of options. 

   *
   
   * Valid options are: 

   * 
   * 
 -N
   *  Normalize input data.
   * 
   *  -R
   *  Rank approximation used in LSA. May be actual number of 
   *  LSA attributes to include (if greater than 1) or a proportion 
   *  of total singular values to account for (if between 0 and 1). 
   *  A value less than or equal to zero means use all latent variables.
   *  (default = 0.95)
   * 
   *  -A
   *  Maximum number of attributes to include in 
   *  transformed attribute names. (-1 = include all)
   * 
   
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions (String[] options)
  throws Exception {
    resetOptions();
    String optionString;
    
    //set approximation rank
    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
      double temp;
      temp = Double.valueOf(optionString).doubleValue();
      setRank(temp);
    }
    
    //set number of attributes to use in transformed names
    optionString = Utils.getOption('A', options);
    if (optionString.length() != 0) {
      setMaximumAttributeNames(Integer.parseInt(optionString));
    }
    
    //set normalize option
    setNormalize(Utils.getFlag('N', options));
  }
  
  /**
   * Reset to defaults
   */
  private void resetOptions() {
    m_rank = 0.95;
    m_normalize = true;
    m_maxAttributesInName = 5;
  }
  
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String normalizeTipText() {
    return "Normalize input data.";
  }
  
  /**
   * Set whether input data will be normalized.
   * @param newNormalize true if input data is to be normalized
   */
  public void setNormalize(boolean newNormalize) {
    m_normalize = newNormalize;
  }
  
  /**
   * Gets whether or not input data is to be normalized
   * @return true if input data is to be normalized
   */
  public boolean getNormalize() {
    return m_normalize;
  }
  
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String rankTipText() {
    return "Matrix rank to use for data reduction. Can be a" +
    " proportion to indicate desired coverage";
  }
  
  /**
   * Sets the desired matrix rank (or coverage proportion) for feature-space reduction
   * @param newRank the desired rank (or coverage) for feature-space reduction
   */
  public void setRank(double newRank) {
      m_rank = newRank;
  }
  
  /**
   * Gets the desired matrix rank (or coverage proportion) for feature-space reduction
   * @return the rank (or coverage) for feature-space reduction
   */
  public double getRank() {
    return m_rank;
  }
  
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String maximumAttributeNamesTipText() {
    return "The maximum number of attributes to include in transformed attribute names.";
  }
  
  /**
   * Sets maximum number of attributes to include in
   * transformed attribute names.
   * @param newMaxAttributes the maximum number of attributes
   */
  public void setMaximumAttributeNames(int newMaxAttributes) {
    m_maxAttributesInName = newMaxAttributes;
  }
  
  /**
   * Gets maximum number of attributes to include in
   * transformed attribute names.
   * @return the maximum number of attributes
   */
  public int getMaximumAttributeNames() {
    return m_maxAttributesInName;
  }
  
  /**
   * Gets the current settings of LatentSemanticAnalysis
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions () {
    
    String[] options = new String[5];
    int current = 0;
    
    if (getNormalize()) {
      options[current++] = "-N";
    }
    
    options[current++] = "-R";
    options[current++] = "" + getRank();
    
    options[current++] = "-A";
    options[current++] = "" + getMaximumAttributeNames();
    
    while (current < options.length) {
      options[current++] = "";
    }
    
    return  options;
  }
  
  /**
   * Returns the capabilities of this evaluator.
   *
   * @return            the capabilities of this evaluator
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();
    
    // attributes
    result.enable(Capability.NOMINAL_ATTRIBUTES);
    result.enable(Capability.NUMERIC_ATTRIBUTES);
    result.enable(Capability.DATE_ATTRIBUTES);
    result.enable(Capability.MISSING_VALUES);
    
    // class
    result.enable(Capability.NOMINAL_CLASS);
    result.enable(Capability.NUMERIC_CLASS);
    result.enable(Capability.DATE_CLASS);
    result.enable(Capability.MISSING_CLASS_VALUES);
    result.enable(Capability.NO_CLASS);
    
    return result;
  }
  
  /**
   * Initializes the singular values/vectors and performs the analysis
   * @param data the instances to analyse/transform
   * @throws Exception if analysis fails
   */
  public void buildEvaluator(Instances data) throws Exception {
    // can evaluator handle data?
    getCapabilities().testWithFail(data);
    
    buildAttributeConstructor(data);
  }
  
  /**
   * Initializes the singular values/vectors and performs the analysis
   * @param data the instances to analyse/transform
   * @throws Exception if analysis fails
   */
  private void buildAttributeConstructor (Instances data) throws Exception {
    // initialize attributes for performing analysis
    m_transpose = false;
    m_s = null;
    m_u = null;
    m_v = null;
    m_outputNumAttributes = -1;
    m_actualRank = -1;
    m_sumSquaredSingularValues = 0.0;
    
    m_trainInstances = new Instances(data);
    m_trainHeader = null;
    
    m_attributeFilter = null;
    m_nominalToBinaryFilter = null;
    
    m_replaceMissingFilter = new ReplaceMissingValues();
    m_replaceMissingFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter);
    
    // vector to hold indices of attributes to delete (class attribute, 
    // attributes that are all missing, or attributes with one distinct value)
    Vector attributesToRemove = new Vector();
    
    // if data has a class attribute
    if (m_trainInstances.classIndex() >= 0) {
      
      m_hasClass = true;
      m_classIndex = m_trainInstances.classIndex();
      
      // set class attribute to be removed
      attributesToRemove.addElement(new Integer(m_classIndex));
    }
    // make copy of training data so the class values (if set) can be appended to final 
    // transformed instances and so that we can check header compatibility
    m_trainHeader = new Instances(m_trainInstances, 0);
    
    // normalize data if desired
    if (m_normalize) {
      m_normalizeFilter = new Normalize();
      m_normalizeFilter.setInputFormat(m_trainInstances);
      m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter);
    }
    
    // convert any nominal attributes to binary numeric attributes
    m_nominalToBinaryFilter = new NominalToBinary();
    m_nominalToBinaryFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinaryFilter);
    
    // delete any attributes with only one distinct value or are all missing
    for (int i = 0; i < m_trainInstances.numAttributes(); i++) {
      if (m_trainInstances.numDistinctValues(i) <= 1) {
        attributesToRemove.addElement(new Integer(i));
      }
    }
    
    // remove columns from the data if necessary
    if (attributesToRemove.size() > 0) {
      m_attributeFilter = new Remove();
      int [] todelete = new int[attributesToRemove.size()];
      for (int i = 0; i < attributesToRemove.size(); i++) {
        todelete[i] = ((Integer)(attributesToRemove.elementAt(i))).intValue();
      }
      m_attributeFilter.setAttributeIndicesArray(todelete);
      m_attributeFilter.setInvertSelection(false);
      m_attributeFilter.setInputFormat(m_trainInstances);
      m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);
    }
    
    // can evaluator handle the processed data ? e.g., enough attributes?
    getCapabilities().testWithFail(m_trainInstances);
    
    // record properties of final, ready-to-process data
    m_numInstances = m_trainInstances.numInstances();
    m_numAttributes = m_trainInstances.numAttributes();
    
    // create matrix of attribute values and compute singular value decomposition
    double [][] trainValues = new double[m_numAttributes][m_numInstances];
    for (int i = 0; i < m_numAttributes; i++) {
      trainValues[i] = m_trainInstances.attributeToDoubleArray(i);
    }
    Matrix trainMatrix = new Matrix(trainValues);
    // svd requires rows >= columns, so transpose data if necessary
    if (m_numAttributes < m_numInstances) {
      m_transpose = true;
      trainMatrix = trainMatrix.transpose();
    }
    SingularValueDecomposition trainSVD = trainMatrix.svd();
    m_u = trainSVD.getU(); // left singular vectors
    m_s = trainSVD.getS(); // singular values
    m_v = trainSVD.getV(); // right singular vectors
    
    // find actual rank to use
    int maxSingularValues = trainSVD.rank();
    for (int i = 0; i < m_s.getRowDimension(); i++) {
      m_sumSquaredSingularValues += m_s.get(i, i) * m_s.get(i, i);
    }
    if (maxSingularValues == 0) { // no nonzero singular values (shouldn't happen)
      // reset values from computation
      m_s = null;
      m_u = null;
      m_v = null;
      m_sumSquaredSingularValues = 0.0;
      
      throw new Exception("SVD computation produced no non-zero singular values.");
    }
    if (m_rank > maxSingularValues || m_rank <= 0) { // adjust rank if too high or too low
      m_actualRank = maxSingularValues;
    } else if (m_rank < 1.0) { // determine how many singular values to include for desired coverage
      double currentSumOfSquaredSingularValues = 0.0;
      for (int i = 0; i < m_s.getRowDimension() && m_actualRank == -1; i++) {
        currentSumOfSquaredSingularValues += m_s.get(i, i) * m_s.get(i, i);
        if (currentSumOfSquaredSingularValues / m_sumSquaredSingularValues >= m_rank) {
          m_actualRank = i + 1;
        }
      }
    } else {
      m_actualRank = (int) m_rank;
    }
    
    // lower matrix ranks, adjust for transposition (if necessary), and
    // compute matrix for transforming future instances
    if (m_transpose) {
      Matrix tempMatrix = m_u;
      m_u = m_v;
      m_v = tempMatrix;
    }
    m_u = m_u.getMatrix(0, m_u.getRowDimension() - 1, 0, m_actualRank - 1);
    m_s = m_s.getMatrix(0, m_actualRank - 1, 0, m_actualRank - 1);
    m_v = m_v.getMatrix(0, m_v.getRowDimension() - 1, 0, m_actualRank - 1);
    m_transformationMatrix = m_u.times(m_s.inverse());
    
    //create dataset header for transformed instances
    m_transformedFormat = setOutputFormat();
  }
  
  /**
   * Set the format for the transformed data
   * @return a set of empty Instances (header only) in the new format
   */
  private Instances setOutputFormat() {
    // if analysis hasn't been performed (successfully) yet
    if (m_s == null) {
      return null;
    }
    
    // set up transformed attributes
    if (m_hasClass) {
      m_outputNumAttributes = m_actualRank + 1;
    } else {
      m_outputNumAttributes = m_actualRank;
    }
    int numAttributesInName = m_maxAttributesInName;
    if (numAttributesInName <= 0 || numAttributesInName >= m_numAttributes) {
      numAttributesInName = m_numAttributes;
    }
    FastVector attributes = new FastVector(m_outputNumAttributes);
    for (int i = 0; i < m_actualRank; i++) {
      // create attribute name
      String attributeName = "";
      double [] attributeCoefficients = 
        m_transformationMatrix.getMatrix(0, m_numAttributes - 1, i, i).getColumnPackedCopy();
      for (int j = 0; j < numAttributesInName; j++) {
        if (j > 0) {
          attributeName += "+";
        }
        attributeName += Utils.doubleToString(attributeCoefficients[j], 5, 3);
        attributeName += m_trainInstances.attribute(j).name();
      }
      if (numAttributesInName < m_numAttributes) {
        attributeName += "...";
      }
      // add attribute
      attributes.addElement(new Attribute(attributeName));
    }
    // add original class attribute if present
    if (m_hasClass) {
      attributes.addElement(m_trainHeader.classAttribute().copy());
    }
    // create blank header
    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_LSA", 
        attributes, 0);
    m_outputNumAttributes = outputFormat.numAttributes();
    // set class attribute if applicable
    if (m_hasClass) {
      outputFormat.setClassIndex(m_outputNumAttributes - 1);
    }
    
    return outputFormat;
  }
  
  /**
   * Returns just the header for the transformed data (ie. an empty
   * set of instances. This is so that AttributeSelection can
   * determine the structure of the transformed data without actually
   * having to get all the transformed data through getTransformedData().
   * @return the header of the transformed data.
   * @throws Exception if the header of the transformed data can't
   * be determined.
   */
  public Instances transformedHeader() throws Exception {
    if (m_s == null) {
      throw new Exception("Latent Semantic Analysis hasn't been successfully performed.");
    }
    return m_transformedFormat;
  }
  
  /**
   * Transform the supplied data set (assumed to be the same format
   * as the training data)
   * @return the transformed training data
   * @throws Exception if transformed data can't be returned
   */
  public Instances transformedData(Instances data) throws Exception {
    if (m_s == null) {
      throw new Exception("Latent Semantic Analysis hasn't been built yet");
    }
    
    Instances output = new Instances(m_transformedFormat, m_numInstances);
    
    // the transformed version of instance i from the training data
    // is stored as the i'th row vector in v (the right singular vectors)
    for (int i = 0; i < data.numInstances(); i++) {
      Instance currentInstance = data.instance(i);
      // record attribute values for converted instance
      double [] newValues = new double[m_outputNumAttributes];
      for (int j = 0; j < m_actualRank; j++) { // fill in values from v
        newValues[j] = m_v.get(i, j);
      }
      if (m_hasClass) { // copy class value if applicable
        newValues[m_outputNumAttributes - 1] = currentInstance.classValue();
      }
      //create new instance with recorded values and add to output dataset
      Instance newInstance;
      if (currentInstance instanceof SparseInstance) {
        newInstance = new SparseInstance(currentInstance.weight(), newValues);
      } else {
        newInstance = new DenseInstance(currentInstance.weight(), newValues);
      }
      output.add(newInstance);
    }
    
    return output;
  }
  
  /**
   * Evaluates the merit of a transformed attribute. This is defined
   * to be the square of the singular value for the latent variable 
   * corresponding to the transformed attribute.
   * @param att the attribute to be evaluated
   * @return the merit of a transformed attribute
   * @throws Exception if attribute can't be evaluated
   */
  public double evaluateAttribute(int att) throws Exception {
    if (m_s == null) {
      throw new Exception("Latent Semantic Analysis hasn't been successfully" +
                            " performed yet!");
    }
    
    //return the square of the corresponding singular value
    return (m_s.get(att, att) * m_s.get(att, att)) / m_sumSquaredSingularValues;
  }
  
  /**
   * Transform an instance in original (unnormalized) format
   * @param instance an instance in the original (unnormalized) format
   * @return a transformed instance
   * @throws Exception if instance can't be transformed
   */
  public Instance convertInstance(Instance instance) throws Exception {
    if (m_s == null) {
      throw new Exception("convertInstance: Latent Semantic Analysis not " +
                           "performed yet.");
    }
    
    // array to hold new attribute values
    double [] newValues = new double[m_outputNumAttributes];
    
    // apply filters so new instance is in same format as training instances
    Instance tempInstance = (Instance)instance.copy();
    if (!instance.dataset().equalHeaders(m_trainHeader)) {
      throw new Exception("Can't convert instance: headers don't match: " +
      "LatentSemanticAnalysis\n" + instance.dataset().equalHeadersMsg(m_trainHeader));
    }
    // replace missing values
    m_replaceMissingFilter.input(tempInstance);
    m_replaceMissingFilter.batchFinished();
    tempInstance = m_replaceMissingFilter.output();
    // normalize
    if (m_normalize) {
      m_normalizeFilter.input(tempInstance);
      m_normalizeFilter.batchFinished();
      tempInstance = m_normalizeFilter.output();
    }
    // convert nominal attributes to binary
    m_nominalToBinaryFilter.input(tempInstance);
    m_nominalToBinaryFilter.batchFinished();
    tempInstance = m_nominalToBinaryFilter.output();
    // remove class/other attributes
    if (m_attributeFilter != null) {
      m_attributeFilter.input(tempInstance);
      m_attributeFilter.batchFinished();
      tempInstance = m_attributeFilter.output();
    }
    
    // record new attribute values
    if (m_hasClass) { // copy class value
      newValues[m_outputNumAttributes - 1] = instance.classValue();
    }
    double [][] oldInstanceValues = new double[1][m_numAttributes];
    oldInstanceValues[0] = tempInstance.toDoubleArray();
    Matrix instanceVector = new Matrix(oldInstanceValues); // old attribute values
    instanceVector = instanceVector.times(m_transformationMatrix); // new attribute values
    for (int i = 0; i < m_actualRank; i++) {
      newValues[i] = instanceVector.get(0, i);
    }
    
    // return newly transformed instance
    if (instance instanceof SparseInstance) {
      return new SparseInstance(instance.weight(), newValues);
    } else {
      return new DenseInstance(instance.weight(), newValues);
    }
  }
  
  /**
   * Returns a description of this attribute transformer
   * @return a String describing this attribute transformer
   */
  public String toString() {
    if (m_s == null) {
      return "Latent Semantic Analysis hasn't been built yet!";
    } else {
      return "\tLatent Semantic Analysis Attribute Transformer\n\n"
      + lsaSummary();
    }
  }
  
  /**
   * Return a summary of the analysis
   * @return a summary of the analysis.
   */
  private String lsaSummary() {
    StringBuffer result = new StringBuffer();
    
    // print number of latent variables used
    result.append("Number of latent variables utilized: " + m_actualRank);
    
    // print singular values
    result.append("\n\nSingularValue\tLatentVariable#\n");
    // create single array of singular values rather than diagonal matrix
    for (int i = 0; i < m_actualRank; i++) {
      result.append(Utils.doubleToString(m_s.get(i, i), 9, 5) + "\t" + (i + 1) + "\n");
    }
    
    // print attribute vectors
    result.append("\nAttribute vectors (left singular vectors) -- row vectors show\n" +
                  "the relation between the original attributes and the latent \n" +
                  "variables computed by the singular value decomposition:\n");
    for (int i = 0; i < m_actualRank; i++) {
      result.append("LatentVariable#" + (i + 1) + "\t");
    }
    result.append("AttributeName\n");
    for (int i = 0; i < m_u.getRowDimension(); i++) { // for each attribute
      for (int j = 0; j < m_u.getColumnDimension(); j++) { // for each latent variable
        result.append(Utils.doubleToString(m_u.get(i, j), 9, 5) + "\t\t");
      }
      result.append(m_trainInstances.attribute(i).name() + "\n");
    }
    
    // print instance vectors
    result.append("\n\nInstance vectors (right singular vectors) -- column\n" +
                  "vectors show the relation between the original instances and the\n" +
                  "latent variables computed by the singular value decomposition:\n");
    for (int i = 0; i < m_numInstances; i++) {
      result.append("Instance#" + (i + 1) + "\t");
    }
    result.append("LatentVariable#\n");
    for (int i = 0; i < m_v.getColumnDimension(); i++) { // for each instance
      for (int j = 0; j < m_v.getRowDimension(); j++) { // for each latent variable
        // going down columns instead of across rows because we're
        // printing v' but have v stored
        result.append(Utils.doubleToString(m_v.get(j, i), 9, 5) + "\t");
      }
      result.append((i + 1) + "\n");
    }
    
    return result.toString();
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 8108 $");
  }
  
  /**
   * Main method for testing this class
   * @param argv should contain the command line arguments to the
   * evaluator/transformer (see AttributeSelection)
   */
  public static void main(String [] argv) {
    runEvaluator(new LatentSemanticAnalysis(), argv);
  }
}