All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.stats.statistics.ContigencyTable Maven / Gradle / Ivy

// ContigencyTable.java
//
// (c) 1999-2001 PAL Development Core Team
//
// This package may be distributed under the
// terms of the Lesser GNU General Public License (LGPL)
package net.maizegenetics.stats.statistics;

import net.maizegenetics.stats.math.MersenneTwisterFast;

/**
 * Class for permuting contigency tables and determining the likelihood of the table.
 * If determining of the probability of a 2x2 table use FisherExact as it is much
 * faster.
 *
 * @version $Id: ContigencyTable.java,v 1
 *
 * @author Ed Buckler
 */
public class ContigencyTable {

MersenneTwisterFast intRand;
int[][] contig;
int csum, rows, cols;
int[] crow,ccol, rowDist, colDist;
float[][] expectation;
private double[] f; //this holds a large series of factorials
double mcF; //this has all the marginal factorials for calculating stats
int maxSize;


  /**
   * constructor for Contigency table
   *
   * @param maxSize is the maximum sum that will be encountered by contigency table
   */
  public ContigencyTable(int maxSize) {
    intRand=new MersenneTwisterFast();
    this.maxSize=2*maxSize;
    f=new double[this.maxSize+1];
    f[0]=0.0;
    for(int i=1; i<=this.maxSize; i++)
      {f[i]=f[i-1]+Math.log(i);}
  }


  /**
   * sets the data for the contigency table, must be set before other methods are called.
   * If tcontig has a greater count than maxSize, then the contig is set to null
   *
   * @param contig is the array of integers with observed states
   */
public void setMatrix(int[][] tcontig)
  {  //this permutes of rowDist to rapidly do the permutations
  int i,j,k,count=0;
  rows=tcontig.length;
  cols=tcontig[0].length;
  contig=tcontig;
  csum=0;
  crow=new int[rows];
  ccol=new int[cols];
  for(i=0; i1000) {
     // System.out.println("Holly Crap! Stop:"+csum);

  }
  if(csum>maxSize)
      {contig=null;
      return;}
  rowDist=new int[csum];     //This sets up the row distribution so that the random number requires only one call
  colDist=new int[csum];
  for(i=0; i0)    //this will calculate the multiple used in calculating the fisher exact tests
    {mcF=0;
    for(i=0; i0)
                  {E=expectation[i][j];
                  chi+=(((float)contig[i][j]-E)*((float)contig[i][j]-E)/E);
                  }
              }
          }
//	printf("The chi=%8.4f\n",chi);
	return chi;
}

final double calcLnFisherExactP()    //the natural log of the fisher exact P
{
	int i,j;
	double lnFisherExactP=mcF;

	for(i=0; i0) {randomcontig();}
	  X=calcchiSquare();
	  if(r==0)
	    {Origchi=X;}
	  else
	    {if(X>=Origchi) {chiprob+=1.0;}}
          if(chiprob>9.0) {reps=r-1;}
	  }
  return chiprob/(double)r;
}

  /**
   * This calculates the probability in the normal approach, using the Chi Square as the test statistic.
   *
   * @param permutations Number of permutations used to calculate the probability
   * @return P-value (NaN is returned if bad contig was set)
   */
public double calcContigencyChiSquare (int permutations)
{//this version is slow, but does fixed number of reps; better if you want the distribution of P
	int r;
	double X, chiprob=0.0f, Origchi=0.0f;

        if(contig==null) {return Double.NaN;}

	for(r=0; r<=permutations; r++)
	  {if (r>0) {randomcontig();}
	  X=calcchiSquare();
	  if(r==0)
	    {Origchi=X;}
	  else
	    {if(X>=Origchi) {chiprob+=1.0;}}
	  }
  return chiprob/(double)r;
}

  /**
   * This calculates the probability in the rapid permutational approach, using the method described
   * by Weir, B. S. (1996) Genetic Data Analysis II (Sinauer, Sunderland, MA)
   * It runs for 1000 permutations unless it find 10 values that beat the observed, and
   * then it stops and calculates the p-value.  This slighly biases the P-values but makes it much more rapid.
   *
   * @param reps is the number of permutations used to the probability
   * @return P-value (NaN is returned if bad contig was set)
   */
public double calcRapidMonteCarloExactTest(int maxPermutations)
    //this is the weir based approach
    //this version is rapid because it has a cut off if 10 reps beat the observed
//then it quits, this is good for rapid analysis and when you are mostly interested in the low p-values
{
	int r,reps=maxPermutations;
	double fE, mCFEprob=0.0f, origMCFE=0.0f;

        if(contig==null) {return Double.NaN;}

	for(r=0; r<=reps; r++)
		{if (r>0) {randomcontig();}
		fE=calcLnFisherExactP();
	if(r==0)
	  {origMCFE=fE;}
		  else
		  	{if(fE<=origMCFE)	{mCFEprob+=1.0;}
		  	}
    if(mCFEprob>9.0)
      {reps=r-1;}
		}
  return mCFEprob/(double)r;
}

  /**
   * This calculates the probability in the normal permutation approach, using the method described
   * by Weir, B. S. (1996) Genetic Data Analysis II (Sinauer, Sunderland, MA).
   *
   * @param permutations Number of permutations used to the probability
   * @return P-value (NaN is returned if bad contig was set)
   */
public double calcMonteCarloExactTest(int permutations)
    //this is the weir based approach
    //this version is slow, but does fixed number of reps; better if you want the distribution of P
{
	int r;
	double fE, mCFEprob=0.0f, origMCFE=0.0f;

        if(contig==null) {return Double.NaN;}

	for(r=0; r<=permutations; r++)
	  {if (r>0) {randomcontig();}
	  fE=calcLnFisherExactP();
    	if(r==0)
    	  {origMCFE=fE;}
    	else
    	  {if(fE<=origMCFE)	{mCFEprob+=1.0;}}
	}
  return mCFEprob/(double)r;
}


  void writeMatrix() {
    for(int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy