All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.taxa.distance.DistanceMatrix Maven / Gradle / Ivy

// DistanceMatrix.java
//
// (c) 1999-2001 PAL Development Core Team
//
// This package may be distributed under the
// terms of the Lesser GNU General Public License (LGPL)
package net.maizegenetics.taxa.distance;

import net.maizegenetics.util.FormattedOutput;
import net.maizegenetics.util.TableReport;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.GeneralAnnotation;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;

/**
 * Storage for pairwise distance matrices. Only stores half the matrix as it is
 * symmetrical.

* * For best performance, iterate over matrix this way. *

 * DistanceMatrix matrix;
 * for (int i = 0; i < myNumTaxa; i++) {
 *     for (int j = 0; j <= i; j++) {
 *         matrix.getDistance(i, j);
 *     }
 * }
 * 
* * @author Korbinian Strimmer * @author Alexei Drummond * @author Terry Casstevens */ public class DistanceMatrix implements TableReport { private final TaxaList myTaxaList; private final int myNumTaxa; private final GeneralAnnotation myAnnotations; private final float[][] myDistances; /** * Use DistanceMatrixBuilder instead of this. * * @see DistanceMatrixBuilder */ DistanceMatrix(float[][] distances, TaxaList taxa, GeneralAnnotation annotations) { myDistances = distances; myTaxaList = taxa; myNumTaxa = myTaxaList.numberOfTaxa(); myAnnotations = annotations; } /** * Constructor taking distances array and taxa list. Use * DistanceMatrixBuilder instead of this. * * @see DistanceMatrixBuilder */ public DistanceMatrix(double[][] distance, TaxaList taxa) { this(distance, taxa, null); } /** * Use DistanceMatrixBuilder instead of this. * * @see DistanceMatrixBuilder */ public DistanceMatrix(double[][] distances, TaxaList taxa, GeneralAnnotation annotations) { myNumTaxa = taxa.numberOfTaxa(); if ((distances == null) || (distances.length != myNumTaxa) || (distances[0].length != myNumTaxa)) { throw new IllegalArgumentException("DistanceMatrix: init: dimensions of distances aren't correct."); } myDistances = new float[myNumTaxa][]; for (int i = 0; i < myNumTaxa; i++) { myDistances[i] = new float[i + 1]; } for (int x = 0; x < myNumTaxa; x++) { for (int y = 0; y <= x; y++) { if (Math.abs(distances[x][y] - distances[y][x]) > 0.0000001) { throw new IllegalStateException("DistanceMatrix: init: values passed in are not symmetrical: " + distances[x][y] + " and: " + distances[y][x]); } myDistances[x][y] = (float) distances[x][y]; } } myTaxaList = taxa; myAnnotations = annotations; } /** * Constructor that clones a distance matrix. */ public DistanceMatrix(DistanceMatrix dm) { myNumTaxa = dm.numberOfTaxa(); myDistances = new float[myNumTaxa][]; for (int i = 0; i < myNumTaxa; i++) { myDistances[i] = new float[i + 1]; } for (int x = 0; x < myNumTaxa; x++) { for (int y = 0; y <= x; y++) { myDistances[x][y] = dm.myDistances[x][y]; } } myTaxaList = dm.myTaxaList; myAnnotations = dm.myAnnotations; } /** * Constructor that clones a distance matrix and for only the specified * taxa. */ public DistanceMatrix(DistanceMatrix dm, TaxaList subset) { myNumTaxa = subset.numberOfTaxa(); myDistances = new float[myNumTaxa][]; for (int i = 0; i < myNumTaxa; i++) { myDistances[i] = new float[i + 1]; } for (int i = 0; i < myNumTaxa; i++) { int index1 = dm.whichIdNumber(subset.taxaName(i)); myDistances[i][i] = dm.myDistances[index1][index1]; for (int j = 0; j < i; j++) { int index2 = dm.whichIdNumber(subset.taxaName(j)); myDistances[i][j] = dm.getDistance(index1, index2); } } myTaxaList = subset; myAnnotations = dm.myAnnotations; } /** * print alignment (PHYLIP format) */ public void printPHYLIP(PrintWriter out) throws IOException { // PHYLIP header line out.println(" " + myNumTaxa); FormattedOutput format = FormattedOutput.getInstance(); for (int i = 0; i < myNumTaxa; i++) { format.displayLabel(out, myTaxaList.taxaName(i), 10); out.print(" "); for (int j = 0; j < myNumTaxa; j++) { // Chunks of 6 blocks each if (j % 6 == 0 && j != 0) { out.println(); out.print(" "); } out.print(" "); format.displayDecimal(out, getDistance(i, j), 5); } out.println(); } } /** * returns representation of this alignment as a string */ @Override public String toString() { StringWriter sw = new StringWriter(); try { printPHYLIP(new PrintWriter(sw)); } catch (Exception e) { e.printStackTrace(); } return sw.toString(); } /** * compute squared distance to second distance matrix */ public double squaredDistance(DistanceMatrix mat, boolean weighted) { double sum = 0; for (int i = 0; i < myNumTaxa - 1; i++) { for (int j = 0; j < i; j++) { double diff = myDistances[i][j] - mat.getDistance(i, j); double weight; if (weighted) { // Fitch-Margoliash weight // (variances proportional to distances) float distance = myDistances[i][j]; weight = 1.0 / distance * distance; } else { // Cavalli-Sforza-Edwards weight // (homogeneity of variances) weight = 1.0; } sum += weight * diff * diff; } } return 2.0 * sum; // we counted only half the matrix } /** * compute absolute distance to second distance matrix */ public double absoluteDistance(DistanceMatrix mat) { double sum = 0; for (int i = 0; i < myNumTaxa - 1; i++) { for (int j = 0; j < i; j++) { double diff = Math.abs(myDistances[i][j] - mat.getDistance(i, j)); sum += diff; } } return 2.0 * sum; // we counted only half the matrix } /** * Returns the number of taxa which is also the number of rows and columns * that the distance matrix has. */ public int getSize() { return myNumTaxa; } /** * Returns the distances as a 2-dimensional array of doubles. Matrix is * cloned first so it can be altered freely. */ public final double[][] getClonedDistances() { double[][] copy = new double[myNumTaxa][myNumTaxa]; for (int i = 0; i < myNumTaxa; i++) { for (int j = 0; j <= i; j++) { copy[i][j] = myDistances[i][j]; copy[j][i] = copy[i][j]; } } return copy; } /** * Returns the distances as a 2-dimensional array of doubles (in the actual * array used to store the distances) */ public final double[][] getDistances() { return getClonedDistances(); } public final float getDistance(final int row, final int col) { if (row > col) { return myDistances[row][col]; } else { return myDistances[col][row]; } } /** * Returns the mean pairwise distance of this matrix */ public double meanDistance() { double dist = 0.0; int count = 0; for (int i = 1; i < myNumTaxa; i++) { for (int j = 0; j < i; j++) { float distance = myDistances[i][j]; if (!Float.isNaN(distance)) { dist += distance; count++; } } } return dist / (double) count; } public Taxon getTaxon(int i) { return myTaxaList.get(i); } public int numberOfTaxa() { return myTaxaList.numberOfTaxa(); } public int whichIdNumber(String name) { return myTaxaList.indexOf(name); } public int whichIdNumber(Taxon id) { return myTaxaList.indexOf(id); } /** * Return TaxaList of this alignment. */ public TaxaList getTaxaList() { return myTaxaList; } /** * test whether this matrix is a symmetric distance matrix * */ public boolean isSymmetric() { for (int i = 0; i < myNumTaxa; i++) { if (myDistances[i][i] != 0) { return false; } } return true; } private boolean isIn(int value, int[] set) { if (set == null) { return false; } for (int i = 0; i < set.length; i++) { if (set[i] == value) { return true; } } return false; } /** * @param fromIndex the index of the thing (taxa,sequence) from which we * want to find the closest (excluding self) * @param exclusion indexes of things that should not be considered, may be * null * @return the index of the member closes to the specified */ public int getClosestIndex(int fromIndex, int[] exclusion) { float min = Float.POSITIVE_INFINITY; int index = -1; for (int i = 0; i < myNumTaxa; i++) { if (i != fromIndex && !isIn(i, exclusion)) { float d = getDistance(fromIndex, i); if (d < min) { min = d; index = i; } } } return index; } public static DistanceMatrix hadamardProduct(DistanceMatrix m0, DistanceMatrix m1) { int n = m0.numberOfTaxa(); if (m1.numberOfTaxa() != n) { throw new IllegalArgumentException("Matrices must be of the same dimensions to compute a Hadamard product."); } DistanceMatrixBuilder builder = DistanceMatrixBuilder.getInstance(m0.getTaxaList()); for (int r = 0; r < n; r++) { for (int c = 0; c <= r; c++) { builder.set(r, c, m0.myDistances[r][c] * m1.myDistances[r][c]); } } return builder.build(); } @Override public Object[] getTableColumnNames() { String[] colNames = new String[getSize() + 1]; colNames[0] = "Taxa"; for (int i = 0; i < myNumTaxa; i++) { colNames[i + 1] = getTaxon(i).toString(); } return colNames; } /** * Returns specified row. * * @param rowLong row number * * @return row */ @Override public Object[] getRow(long rowLong) { int row = (int) rowLong; Object[] result = new Object[myNumTaxa + 1]; result[0] = getTaxon(row); for (int j = 1; j <= myNumTaxa; j++) { result[j] = String.valueOf(getDistance(row, j - 1)); } return result; } @Override public String getTableTitle() { return "Distance Matrix"; } @Override public long getRowCount() { return myNumTaxa; } @Override public long getElementCount() { return getRowCount() * getColumnCount(); } @Override public int getColumnCount() { return myNumTaxa + 1; } @Override public Object getValueAt(long rowIndex, int columnIndex) { if (columnIndex == 0) { return getTaxon((int) rowIndex); } return getDistance((int) rowIndex, columnIndex - 1); } public String getColumnName(int col) { if (col == 0) { return "Taxa"; } return getTaxon(col - 1).toString(); } public GeneralAnnotation annotations() { return myAnnotations; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy