net.maizegenetics.analysis.distance.Kinship Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
package net.maizegenetics.analysis.distance;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.dna.snp.score.ReferenceProbability;
import net.maizegenetics.matrixalgebra.Matrix.DoubleMatrix;
import net.maizegenetics.matrixalgebra.Matrix.DoubleMatrixFactory;
import net.maizegenetics.dna.snp.GenotypeTable.GENOTYPE_TABLE_COMPONENT;
import net.maizegenetics.taxa.distance.DistanceMatrix;
import java.util.function.BiConsumer;
import java.util.stream.IntStream;
/**
* @author Terry Casstevens
* @author Zhiwu Zhang
* @author Peter Bradbury
*
* @deprecated This utility has been replaced. See KinshipPlugin. Please discuss
* with Terry if something needs to be salvaged from this class.
*/
public class Kinship {
//scale the numeric matrix produced by the transform function or from probabilities which code phenotypes as {1,0.5,0}
public static double MATRIX_MULTIPLIER = 2;
public static enum KINSHIP_TYPE {
Endelman
};
private Kinship() {
// utility to create kinship matrix
}
/**
* @deprecated Replaced by {@link EndelmanDistanceMatrix#getInstance(net.maizegenetics.dna.snp.GenotypeTable, int, net.maizegenetics.util.ProgressListener)
* }
*/
private static DistanceMatrix createKinship(GenotypeTable mar, KINSHIP_TYPE kinshipType, GENOTYPE_TABLE_COMPONENT dataType) {
System.out.println("Starting Kinship.buildFromMarker().");
long start = System.currentTimeMillis();
DistanceMatrix result;
if (kinshipType == KINSHIP_TYPE.Endelman) {
result = buildFromMarker(mar, kinshipType, dataType);
} else {
throw new IllegalArgumentException("Kinship: createKinship: unknown kinship algorithm: " + kinshipType);
}
System.out.printf("Built Kinship in %d millisec.\n", System.currentTimeMillis() - start);
return result;
}
private static DistanceMatrix buildFromMarker(GenotypeTable mar, KINSHIP_TYPE kinshipType, GENOTYPE_TABLE_COMPONENT dataType) {
if (dataType == GENOTYPE_TABLE_COMPONENT.Genotype) {
return calculateKinshipFromMarkers(mar);
} else if (dataType == GENOTYPE_TABLE_COMPONENT.ReferenceProbability) {
return calculateRelationshipKinshipFromReferenceProbability(mar);
} else {
throw new IllegalArgumentException("The supplied data type is not currently supported by the Kinship method: " + dataType);
}
}
/**
* Calculates a kinship matrix from genotypes using the method described in
* Endelman and Jannink (2012) G3 2:1407-1413. It is best to impute missing
* data before calculating. However, if data is missing it is replaced by
* the allele average at that site.
*
*/
private static DistanceMatrix calculateKinshipFromMarkers(GenotypeTable mar) {
//mar is the input genotype table
byte missingAllele = GenotypeTable.UNKNOWN_ALLELE;
// from Endelman and Jannink. 2012. G3 2:1407ff
// A = WW'/[2*sumk(pk*qk)]
// where W = centered genotype matrix (centered on marker mean value, marker coded as 2,1,0)
// where marker is multi-allelic, leave one allele out to keep markers independent
int ntaxa = mar.numberOfTaxa();
int nsites = mar.numberOfSites();
double[][] distance = new double[ntaxa][ntaxa];
double sumpi = 0;
//calculate WW' by summing ww' for each allele, where w is a column vector of centered allele counts {2,1,0}
for (int s = 0; s < nsites; s++) {
int[][] alleleFreq = mar.allelesSortedByFrequency(s);
int nalleles = alleleFreq[0].length;
int totalAlleleCount = mar.totalGametesNonMissingForSite(s);
for (int a = 0; a < nalleles - 1; a++) {
double pi = ((double) alleleFreq[1][a]) / ((double) totalAlleleCount);
double pix2 = 2 * pi;
sumpi += pi * (1 - pi);
double[] scores = new double[ntaxa];
for (int t = 0; t < ntaxa; t++) {
byte[] geno = GenotypeTableUtils.getDiploidValues(mar.genotype(t, s));
double thisScore = 0;
if (geno[0] != missingAllele) {
if (geno[0] == alleleFreq[0][a]) {
thisScore++;
}
if (geno[1] == alleleFreq[0][a]) {
thisScore++;
}
thisScore -= pix2;
}
scores[t] = thisScore;
}
for (int r = 0; r < ntaxa; r++) {
double rowval = scores[r];
distance[r][r] += rowval * rowval;
for (int c = r + 1; c < ntaxa; c++) {
distance[r][c] += rowval * scores[c];
}
}
}
}
double sumpk = 2 * sumpi;
for (int r = 0; r < ntaxa; r++) {
distance[r][r] /= sumpk;
for (int c = r + 1; c < ntaxa; c++) {
distance[r][c] = distance[c][r] = distance[r][c] / sumpk;
}
}
return new DistanceMatrix(distance, mar.taxa());
}
/**
* Calculates a kinship matrix from genotypes using the method described in
* Endelman and Jannink (2012) G3 2:1407-1413. It is best to impute missing
* data before calculating. However, if data is missing it is replaced by
* the allele average at that site.
*
*/
private static DistanceMatrix calculateKinshipFromMarkersV2(GenotypeTable mar) {
//mar is the input genotype table
byte missingAllele = GenotypeTable.UNKNOWN_ALLELE;
// from Endelman and Jannink. 2012. G3 2:1407ff
// A = WW'/[2*sumk(pk*qk)]
// where W = centered genotype matrix (centered on marker mean value, marker coded as 2,1,0)
// where marker is multi-allelic, leave one allele out to keep markers independent
int ntaxa = mar.numberOfTaxa();
int nsites = mar.numberOfSites();
double[][] distance = new double[ntaxa][ntaxa];
double sumpq = IntStream.range(0, nsites).parallel().mapToDouble(s -> {
int[][] alleleFreq = mar.allelesSortedByFrequency(s);
int nalleles = alleleFreq[0].length;
double totalAlleleCount = mar.totalGametesNonMissingForSite(s);
double pq = 0;
for (int a = 0; a < nalleles - 1; a++) {
double p = alleleFreq[1][a] / totalAlleleCount;
pq += p * (1 - p);
}
return pq;
}).sum();
//calculate WW' by summing ww' for each allele, where w is a column vector of centered allele counts {2,1,0}
BiConsumer siteDistance = (dist, site) -> {
int s = site.intValue();
int[][] alleleFreq = mar.allelesSortedByFrequency(s);
int nalleles = alleleFreq[0].length;
int totalAlleleCount = mar.totalGametesNonMissingForSite(s);
for (int a = 0; a < nalleles - 1; a++) {
double pi = ((double) alleleFreq[1][a]) / ((double) totalAlleleCount);
double pix2 = 2 * pi;
double[] scores = new double[ntaxa];
for (int t = 0; t < ntaxa; t++) {
byte[] geno = GenotypeTableUtils.getDiploidValues(mar.genotype(t, s));
double thisScore = 0;
if (geno[0] != missingAllele) {
if (geno[0] == alleleFreq[0][a]) {
thisScore++;
}
if (geno[1] == alleleFreq[0][a]) {
thisScore++;
}
thisScore -= pix2;
}
scores[t] = thisScore;
}
for (int r = 0; r < ntaxa; r++) {
double rowval = scores[r];
dist[r][r] += rowval * rowval;
for (int c = r + 1; c < ntaxa; c++) {
dist[r][c] += rowval * scores[c];
}
}
}
};
BiConsumer mergeDistance = (d1, d2) -> {
for (int r = 0; r < ntaxa; r++) {
double[] row1 = d1[r];
double[] row2 = d2[r];
for (int c = 0; c < ntaxa; c++) {
row1[c] += row2[c];
}
}
};
distance = IntStream.range(0, nsites).boxed().parallel().collect(() -> new double[ntaxa][ntaxa], siteDistance, mergeDistance);
double sumpk = 2 * sumpq;
for (int r = 0; r < ntaxa; r++) {
distance[r][r] /= sumpk;
for (int c = r + 1; c < ntaxa; c++) {
distance[r][c] = distance[c][r] = distance[r][c] / sumpk;
}
}
return new DistanceMatrix(distance, mar.taxa());
}
private static DistanceMatrix calculateRelationshipKinshipFromReferenceProbability(GenotypeTable mar) {
ReferenceProbability referenceP = mar.referenceProbability();
//calculate the column averages and sumpq, center W
int nrow = referenceP.numTaxa();
int ncol = referenceP.numSites();
double[][] W = new double[nrow][ncol];
for (int r = 0; r < nrow; r++) {
for (int c = 0; c < ncol; c++) {
W[r][c] = referenceP.value(r, c) * MATRIX_MULTIPLIER;
}
}
double sumpq = 0;
for (int c = 0; c < ncol; c++) {
double colTotal = 0;
int colCount = 0;
for (int r = 0; r < nrow; r++) {
if (!Double.isNaN(W[r][c])) {
colTotal += W[r][c];
colCount++;
}
}
double pi = colTotal / colCount / 2.0;
double pix2 = pi * 2;
sumpq += pi * (1 - pi);
for (int r = 0; r < nrow; r++) {
if (Double.isNaN(W[r][c])) {
W[r][c] = 0;
} else {
W[r][c] -= pix2;
}
}
}
DoubleMatrix WWt = DoubleMatrixFactory.DEFAULT.make(W).tcrossproduct();
double[][] scaledIBS = new double[nrow][nrow];
for (int r = 0; r < nrow; r++) {
for (int c = 0; c < nrow; c++) {
scaledIBS[r][c] = WWt.get(r, c) / sumpq / 2;
}
}
return new DistanceMatrix(scaledIBS, mar.taxa());
}
private static DistanceMatrix calculateRelationshipKinshipFromAlleleProbabilities() {
//TODO implement
return null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy