
umcg.genetica.math.stats.Correlation Maven / Gradle / Ivy
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package umcg.genetica.math.stats;
import cern.jet.stat.tdouble.Probability;
import umcg.genetica.containers.Pair;
import umcg.genetica.util.RankDoubleArray;
/**
*
* @author harmjan
*/
public class Correlation {
public static double[][] m_correlationToZScore;
public static double rankCorrelate(double[] x, double[] y) {
RankDoubleArray rda = new RankDoubleArray();
if (x == null || y == null) {
throw new IllegalArgumentException("Error calculating correlation: x or y is null!");
} else if (y.length != x.length) {
throw new IllegalArgumentException("Error calculating correlation: x and y should have the same length!");
}
double[] xNew = new double[x.length];
double[] yNew = new double[y.length];
for (int a = 0; a < x.length; a++) {
xNew[a] = x[a];
yNew[a] = y[a];
}
double[] xranked = rda.rank(xNew);
double[] yranked = rda.rank(yNew);
return correlate(xranked, yranked);
}
public static void correlationToZScore(int maxNrSamples) {
//Fast look-up service to determine P-Value for individual Spearman correlation coefficient, given sample size:
if (m_correlationToZScore == null || m_correlationToZScore.length < maxNrSamples + 1) {
// System.out.println("Creating new LookupTable: "+maxNrSamples);
// if(m_correlationToZScore != null){
// System.out.println("Length: "+m_correlationToZScore.length+"\treq: "+maxNrSamples);
// }
double[][] correlationToZScore = new double[maxNrSamples + 1][2001];
cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = new cern.jet.random.tdouble.engine.DRand();
for (int nrSamples = 0; nrSamples <= maxNrSamples; nrSamples++) {
if (nrSamples < 3) {
for (int sc = 0; sc <= 2000; sc++) {
correlationToZScore[nrSamples][sc] = 0;
}
} else {
cern.jet.random.tdouble.StudentT tDistColt = new cern.jet.random.tdouble.StudentT(nrSamples - 2, randomEngine);
//JSci.maths.statistics.TDistribution tDist = new JSci.maths.statistics.TDistribution(nrSamples - 2);
for (int s = 0; s <= 2000; s++) {
//Determine Spearman correlation R:
double spearman = (double) (s - 1000) / 1000d;
if (Math.abs(spearman - -1.0d) < .0000001) {
spearman = -0.9999;
}
if (Math.abs(spearman - 1.0d) < .0000001) {
spearman = +0.9999;
}
//Calculate T score:
double t = spearman / (Math.sqrt((1 - spearman * spearman) / (nrSamples - 2)));
//Look up P value, avoid complementation, due to Double inaccuracies.
//And lookup Z score, and avoid complementation here as well.
double pValueColt = 1;
double zScoreColt = 0;
if (t < 0) {
pValueColt = tDistColt.cdf(t);
if (pValueColt < 2.0E-323) {
pValueColt = 2.0E-323;
}
zScoreColt = Probability.normalInverse(pValueColt);
//zScoreColt = normDist.inverse(pValueColt);
} else {
pValueColt = tDistColt.cdf(-t);
if (pValueColt < 2.0E-323) {
pValueColt = 2.0E-323;
}
zScoreColt = -Probability.normalInverse(pValueColt);
//zScoreColt = -normDist.inverse(pValueColt);
}
//Store Z score:
correlationToZScore[nrSamples][s] = zScoreColt;
}
}
}
m_correlationToZScore = correlationToZScore;
}
}
//Requires mean of 0
public static double correlateMeanCenteredData(double[] x, double[] y, double varX, double varY) {
double covarianceInterim = 0;
for (int i = 0; i < x.length; i++) {
covarianceInterim += x[i] * y[i];
}
return (covarianceInterim / (x.length - 1)) / Math.sqrt(varX * varY);
}
//Requires mean of 0
public static double correlateMeanCenteredData(double[] x, double[] y, double sdXsdY) {
double covarianceInterim = 0;
for (int i = 0; i < x.length; i++) {
covarianceInterim += x[i] * y[i];
}
return (covarianceInterim / (x.length - 1)) / (sdXsdY);
}
// public static double correlateMeanCenteredData(double[] x, double[] y) {
//
// double[] xNew = new double[x.length];
// double[] yNew = new double[y.length];
// for (int a=0; a tmpMean = Descriptives.mean(x, y);
double meanX = tmpMean.getLeft();
double meanY = tmpMean.getRight();
double varX = 0;
double varY = 0;
double covarianceInterim = 0;
for (int a = 0; a < x.length; a++) {
double varXT = (x[a] - meanX);
double varYT = (y[a] - meanY);
covarianceInterim += varXT * varYT;
varX += varXT * varXT;
varY += varYT * varYT;
}
varY = varY / (y.length - 1);
varX = varX / (x.length - 1);
double denominator = Math.sqrt(varX * varY);
double covariance = covarianceInterim / (x.length - 1);
double correlation = covariance / denominator;
return correlation;
} else {
System.out.println("Warning two arrays of non identical length are put in for correlation.");
System.out.println("Returning NaN");
return (Double.NaN);
}
}
/**
* Fast correlation of two double[] with their mean values
*
* @param x
* @param y
* @param meanX
* @param meanY
* @return
*/
public static double correlate(double meanX, double meanY, double[] x, double[] y) {
if (x.length == y.length) {
double varX = 0;
double varY = 0;
double covarianceInterim = 0;
for (int a = 0; a < x.length; a++) {
double varXT = (x[a] - meanX);
double varYT = (y[a] - meanY);
covarianceInterim += varXT * varYT;
varX += varXT * varXT;
varY += varYT * varYT;
}
varY = varY / (y.length - 1);
varX = varX / (x.length - 1);
double denominator = Math.sqrt(varX * varY);
double covariance = covarianceInterim / (x.length - 1);
double correlation = covariance / denominator;
return correlation;
} else {
System.out.println("Warning two arrays of non identical length are put in for correlation.");
System.out.println("Returning NaN");
return (Double.NaN);
}
}
/**
* Fast covariation of two double[] ToDo change correlateMeanCenteredData code to covariate
* code! ToDo add test
*
* @param x
* @param y
* @return
*/
public static double covariate(double[] x, double[] y) {
if (x.length == y.length) {
Pair tmpMean = Descriptives.mean(x, y);
double meanX = tmpMean.getLeft();
double meanY = tmpMean.getRight();
double covarianceInterim = 0;
for (int a = 0; a < x.length; a++) {
double varXT = (x[a] - meanX);
double varYT = (y[a] - meanY);
covarianceInterim += varXT * varYT;
}
double covariance = covarianceInterim / (x.length - 1);
return covariance;
} else {
System.out.println("Warning two arrays of non identical length are put in for correlation.");
System.out.println("Returning NaN");
return (Double.NaN);
}
}
/**
* Fast covariation of two double[] with their mean values ToDo change
* correlateMeanCenteredData code to covariate code! ToDo add test
*
* @param x
* @param y
* @param meanX
* @param meanY
* @return
*/
public static double covariate(double meanX, double meanY, double[] x, double[] y) {
if (x.length == y.length) {
double covarianceInterim = 0;
for (int a = 0; a < x.length; a++) {
double varXT = (x[a] - meanX);
double varYT = (y[a] - meanY);
covarianceInterim += varXT * varYT;
}
double covariance = covarianceInterim / (x.length - 1);
return covariance;
} else {
System.out.println("Warning two arrays of non identical length are put in for correlation.");
System.out.println("Returning NaN");
return (Double.NaN);
}
}
public static double convertCorrelationToZScore(int length, double correlation) {
int correlationIndex = (int) Math.round(((correlation + 1.0d) * 1000d));
if (m_correlationToZScore == null) {
throw new IllegalArgumentException("Correlation to ZScore lookup table is not initialized.");
} else {
if (m_correlationToZScore.length < length) {
throw new IllegalArgumentException("Correlation to ZScore lookup table is not initialized properly. Expected: " + length + ". Actual length: " + m_correlationToZScore.length);
} else if (correlationIndex > m_correlationToZScore[length].length - 1) {
throw new IllegalArgumentException("ERROR! correlation: " + correlation + " does not fit Z-score table for " + m_correlationToZScore[length] + " samples (length is: " + m_correlationToZScore[length].length + ")");
}
return m_correlationToZScore[length][correlationIndex];
}
}
public static double correlate(double[] a1, double[] a2, double mean1, double mean2, double var1, double var2) {
double denom = Math.sqrt(var1 * var2);
if (denom != 0) {
if (a1.length != a2.length) {
throw new IllegalArgumentException("Arrays must have the same length : " + a1.length + ", " + a2.length);
}
double ans = 0.0;
for (int i = 0; i < a1.length; i++) {
ans += (a1[i] - mean1) * (a2[i] - mean2);
}
return ans / (a1.length - 1) / denom;
} else {
if (var1 == 0 && var2 == 0) {
return (1.0);
} else {
return (0.0); // impossible to correlateMeanCenteredData a null signal with another
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy