All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.math3.stat.correlation.SpearmansCorrelation Maven / Gradle / Ivy

There is a newer version: 2.12.15
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math3.stat.correlation;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.linear.BlockRealMatrix;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.stat.ranking.NaNStrategy;
import org.apache.commons.math3.stat.ranking.NaturalRanking;
import org.apache.commons.math3.stat.ranking.RankingAlgorithm;

/**
 * Spearman's rank correlation. This implementation performs a rank
 * transformation on the input data and then computes {@link PearsonsCorrelation}
 * on the ranked data.
 * 

* By default, ranks are computed using {@link NaturalRanking} with default * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged). * The ranking algorithm can be set using a constructor argument. * * @since 2.0 */ public class SpearmansCorrelation { /** Input data */ private final RealMatrix data; /** Ranking algorithm */ private final RankingAlgorithm rankingAlgorithm; /** Rank correlation */ private final PearsonsCorrelation rankCorrelation; /** * Create a SpearmansCorrelation without data. */ public SpearmansCorrelation() { this(new NaturalRanking()); } /** * Create a SpearmansCorrelation with the given ranking algorithm. *

* From version 4.0 onwards this constructor will throw an exception * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy. * * @param rankingAlgorithm ranking algorithm * @since 3.1 */ public SpearmansCorrelation(final RankingAlgorithm rankingAlgorithm) { data = null; this.rankingAlgorithm = rankingAlgorithm; rankCorrelation = null; } /** * Create a SpearmansCorrelation from the given data matrix. * * @param dataMatrix matrix of data with columns representing * variables to correlate */ public SpearmansCorrelation(final RealMatrix dataMatrix) { this(dataMatrix, new NaturalRanking()); } /** * Create a SpearmansCorrelation with the given input data matrix * and ranking algorithm. *

* From version 4.0 onwards this constructor will throw an exception * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy. * * @param dataMatrix matrix of data with columns representing * variables to correlate * @param rankingAlgorithm ranking algorithm */ public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) { this.rankingAlgorithm = rankingAlgorithm; this.data = rankTransform(dataMatrix); rankCorrelation = new PearsonsCorrelation(data); } /** * Calculate the Spearman Rank Correlation Matrix. * * @return Spearman Rank Correlation Matrix * @throws NullPointerException if this instance was created with no data */ public RealMatrix getCorrelationMatrix() { return rankCorrelation.getCorrelationMatrix(); } /** * Returns a {@link PearsonsCorrelation} instance constructed from the * ranked input data. That is, * new SpearmansCorrelation(matrix).getRankCorrelation() * is equivalent to * new PearsonsCorrelation(rankTransform(matrix)) where * rankTransform(matrix) is the result of applying the * configured RankingAlgorithm to each of the columns of * matrix. * *

Returns null if this instance was created with no data.

* * @return PearsonsCorrelation among ranked column data */ public PearsonsCorrelation getRankCorrelation() { return rankCorrelation; } /** * Computes the Spearman's rank correlation matrix for the columns of the * input matrix. * * @param matrix matrix with columns representing variables to correlate * @return correlation matrix */ public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) { final RealMatrix matrixCopy = rankTransform(matrix); return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy); } /** * Computes the Spearman's rank correlation matrix for the columns of the * input rectangular array. The columns of the array represent values * of variables to be correlated. * * @param matrix matrix with columns representing variables to correlate * @return correlation matrix */ public RealMatrix computeCorrelationMatrix(final double[][] matrix) { return computeCorrelationMatrix(new BlockRealMatrix(matrix)); } /** * Computes the Spearman's rank correlation coefficient between the two arrays. * * @param xArray first data array * @param yArray second data array * @return Returns Spearman's rank correlation coefficient for the two arrays * @throws DimensionMismatchException if the arrays lengths do not match * @throws MathIllegalArgumentException if the array length is less than 2 */ public double correlation(final double[] xArray, final double[] yArray) { if (xArray.length != yArray.length) { throw new DimensionMismatchException(xArray.length, yArray.length); } else if (xArray.length < 2) { throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION, xArray.length, 2); } else { double[] x = xArray; double[] y = yArray; if (rankingAlgorithm instanceof NaturalRanking && NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) { final Set nanPositions = new HashSet(); nanPositions.addAll(getNaNPositions(xArray)); nanPositions.addAll(getNaNPositions(yArray)); x = removeValues(xArray, nanPositions); y = removeValues(yArray, nanPositions); } return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(x), rankingAlgorithm.rank(y)); } } /** * Applies rank transform to each of the columns of matrix * using the current rankingAlgorithm. * * @param matrix matrix to transform * @return a rank-transformed matrix */ private RealMatrix rankTransform(final RealMatrix matrix) { RealMatrix transformed = null; if (rankingAlgorithm instanceof NaturalRanking && ((NaturalRanking) rankingAlgorithm).getNanStrategy() == NaNStrategy.REMOVED) { final Set nanPositions = new HashSet(); for (int i = 0; i < matrix.getColumnDimension(); i++) { nanPositions.addAll(getNaNPositions(matrix.getColumn(i))); } // if we have found NaN values, we have to update the matrix size if (!nanPositions.isEmpty()) { transformed = new BlockRealMatrix(matrix.getRowDimension() - nanPositions.size(), matrix.getColumnDimension()); for (int i = 0; i < transformed.getColumnDimension(); i++) { transformed.setColumn(i, removeValues(matrix.getColumn(i), nanPositions)); } } } if (transformed == null) { transformed = matrix.copy(); } for (int i = 0; i < transformed.getColumnDimension(); i++) { transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i))); } return transformed; } /** * Returns a list containing the indices of NaN values in the input array. * * @param input the input array * @return a list of NaN positions in the input array */ private List getNaNPositions(final double[] input) { final List positions = new ArrayList(); for (int i = 0; i < input.length; i++) { if (Double.isNaN(input[i])) { positions.add(i); } } return positions; } /** * Removes all values from the input array at the specified indices. * * @param input the input array * @param indices a set containing the indices to be removed * @return the input array without the values at the specified indices */ private double[] removeValues(final double[] input, final Set indices) { if (indices.isEmpty()) { return input; } final double[] result = new double[input.length - indices.size()]; for (int i = 0, j = 0; i < input.length; i++) { if (!indices.contains(i)) { result[j++] = input[i]; } } return result; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy