All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.taxa.distance.ReadDistanceMatrix Maven / Gradle / Ivy

//
// ReadDistanceMatrix.java
//
package net.maizegenetics.taxa.distance;

import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.GeneralAnnotationStorage;
import net.maizegenetics.util.Utils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.regex.Pattern;

/**
 * @author Terry Casstevens
 */
public class ReadDistanceMatrix {

    private static final Logger myLogger = LogManager.getLogger(ReadDistanceMatrix.class);

    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");

    private ReadDistanceMatrix() {
    }

    public static DistanceMatrix readDistanceMatrix(String filename) {

        String[] grmFilenames = DistanceMatrixUtils.getGRMFilenames(filename);
        String grmNBinFilename = grmFilenames[2];

        try (BufferedReader reader = Utils.getBufferedReader(filename)) {

            GeneralAnnotationStorage.Builder annotations = GeneralAnnotationStorage.getBuilder();
            boolean notFinished = true;
            String line = null;
            while (notFinished) {
                line = reader.readLine();
                if (line == null) {
                    throw new IllegalArgumentException("ReadDistanceMatrix: There is no matrix data in this file: " + filename);
                }
                line = line.trim();
                if (line.isEmpty()) {
                    // do nothing
                } else if (line.startsWith("##")) {
                    String[] keyValue = line.substring(2).split("=", 2);
                    annotations.addAnnotation(keyValue[0].trim(), keyValue[1].trim());
                } else {
                    notFinished = false;
                }
            }

            int numTaxa = 0;
            try {
                numTaxa = Integer.parseInt(line);
            } catch (NumberFormatException e) {
                throw new IllegalArgumentException("ReadDistanceMatrix: The number of taxa is not a number: " + line);
            }

            DistanceMatrixBuilder builder = DistanceMatrixBuilder.getInstance(numTaxa);
            builder.annotation(annotations.build());

            String current = reader.readLine();
            int index = 0;
            while (current != null) {

                if (index >= numTaxa) {
                    throw new IllegalArgumentException("ReadDistancMatrix: There are too many lines in this file.  Expected: " + (numTaxa + 1) + " counting the first line (number of taxa)");
                }

                String[] tokens = WHITESPACE_PATTERN.split(current);
                if (tokens.length != numTaxa + 1) {
                    throw new IllegalStateException("ReadDistanceMatrix: Incorrect number of values on line number: " + (index + 2) + " expected: " + (numTaxa + 1) + " counting taxon name. actual: " + tokens.length);
                }
                builder.addTaxon(new Taxon(tokens[0]));

                for (int i = 0; i < numTaxa; i++) {
                    try {
                        builder.set(index, i, Double.parseDouble(tokens[i + 1]));
                    } catch (NumberFormatException nfex) {
                        myLogger.debug(nfex.getMessage(), nfex);
                        throw new IllegalArgumentException("ReadDistanceMatrix: Incorrectly formatted number: " + tokens[i + 1] + " on line number: " + (index + 2));
                    }
                }

                current = reader.readLine();
                index++;
            }

            if (index != numTaxa) {
                throw new IllegalArgumentException("ReadDistanceMatrix: There are too few lines in this file.  Expected: " + (numTaxa + 1) + " counting the first line (number of taxa)");
            }

            if (new File(grmNBinFilename).exists()) {
                readBinMultiBlupCounts(grmNBinFilename, builder);
            }

            return builder.build();

        } catch (IOException ioex) {
            myLogger.debug(ioex.getMessage(), ioex);
            throw new IllegalStateException("ReadDistanceMatrix: Problem reading file: " + filename);
        }

    }

    public static DistanceMatrix readBinMultiBlupMatrix(String filename) {

        String[] grmFilenames = DistanceMatrixUtils.getGRMFilenames(filename);
        String grmIdFilename = grmFilenames[0];
        String grmBinFilename = grmFilenames[1];
        String grmNBinFilename = grmFilenames[2];

        TaxaList taxa = readBinMultiBlupID(grmIdFilename);
        int numTaxa = taxa.numberOfTaxa();

        DistanceMatrixBuilder builder = DistanceMatrixBuilder.getInstance(taxa);
        int taxaOne = 0;
        int taxaTwo = 0;
        try (InputStream input = Utils.getInputStream(grmBinFilename)) {
            byte[] current = new byte[4];
            int numRead = input.read(current);
            while (numRead == 4) {
                float f = ByteBuffer.wrap(current).order(ByteOrder.LITTLE_ENDIAN).getFloat();
                builder.set(taxaTwo, taxaOne++, f);
                if (taxaOne > taxaTwo) {
                    taxaOne = 0;
                    taxaTwo++;
                }
                numRead = input.read(current);
            }

        } catch (Exception e) {
            myLogger.debug(e.getMessage(), e);
            throw new IllegalStateException("ReadDistanceMatrix: readBinMultiBlupMatrix: Problem reading: " + grmBinFilename);
        }

        readBinMultiBlupCounts(grmNBinFilename, builder);

        return builder.build();

    }

    public static TaxaList readBinMultiBlupID(String filename) {

        TaxaListBuilder builder = new TaxaListBuilder();

        try (BufferedReader reader = Utils.getBufferedReader(filename)) {
            String current = reader.readLine();
            while (current != null) {
                String[] tokens = current.split("\t");
                if (tokens.length != 2) {
                    throw new IllegalStateException("ReadDistanceMatrix: readBinMultiBlupID: Rows must have two columns: " + filename);
                }
                builder.add(new Taxon(tokens[1]));
                current = reader.readLine();
            }
        } catch (Exception e) {
            myLogger.debug(e.getMessage(), e);
            throw new IllegalStateException("ReadDistanceMatrix: readBinMultiBlupID: Problem reading: " + filename);
        }

        return builder.build();

    }

    public static void readBinMultiBlupCounts(String filename, DistanceMatrixBuilder builder) {

        try (InputStream input = Utils.getInputStream(filename)) {
            int taxaOne = 0;
            int taxaTwo = 0;
            byte[] current = new byte[4];
            int numRead = input.read(current);
            while (numRead == 4) {
                float f = ByteBuffer.wrap(current).order(ByteOrder.LITTLE_ENDIAN).getFloat();
                builder.setCount(taxaTwo, taxaOne++, Math.round(f));
                if (taxaOne > taxaTwo) {
                    taxaOne = 0;
                    taxaTwo++;
                }
                numRead = input.read(current);
            }
        } catch (Exception e) {
            myLogger.debug(e.getMessage(), e);
            throw new IllegalStateException("ReadDistanceMatrix: readBinMultiBlupCounts: Problem reading: " + filename);
        }

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy