All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.distributed.CorrelationMatrixMapTask Maven / Gradle / Ivy

Go to download

This package provides generic configuration class and distributed map/reduce style tasks for Weka

There is a newer version: 1.0.9
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    CorrelationMatrixMapTask.java
 *    Copyright (C) 2013 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.distributed;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.stats.ArffSummaryNumericMetric;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

/**
 * A map task that computes partial covariance sums for a covariance/correlation
 * matrix from the data it gets via its processInstance() method. Expects to be
 * initialized with a training header that includes summary meta attributes. Can
 * replace missing values with means or ommit them from the updates.
 * 
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision: 11018 $
 */
public class CorrelationMatrixMapTask implements Serializable, OptionHandler {

  /** For serialization */
  private static final long serialVersionUID = 3437000574208204515L;

  /** Final result is covariance rather than correlation? */
  protected boolean m_covariance;

  /** Whether to replace any missing values with the mean or just ignore */
  protected boolean m_replaceMissingWithMean = true;

  /** Holds the version of the header that contains the summary meta attributes */
  protected Instances m_headerWithSummary;

  /** The header without the summary meta attributes */
  protected Instances m_header;

  /**
   * Whether to delete the class attribute if set in the data (so that if it is
   * numeric it doesn't become part of the correlation matrix)
   */
  protected boolean m_deleteClassIfSet = true;

  /** Remove filter for removing attributes */
  protected Remove m_remove;

  /** Holds the partial covariance sums matrix */
  protected double[][] m_corrMatrix;

  /**
   * Co-occurrence counts when ignoring missings rather than replacing with
   * means
   */
  protected int[][] m_coOccurrenceCounts;

  /** Holds the mean for each numeric attribute */
  protected double[] m_means;

  @Override
  public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy