All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.gbs.MergeMultipleTOPMPlugin Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

The newest version!
/*
 * MergeMultipleTOPMPlugin
 */
package net.maizegenetics.analysis.gbs;

import java.awt.Frame;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
import javax.swing.ImageIcon;
import net.maizegenetics.dna.map.TOPMInterface;
import net.maizegenetics.dna.map.TOPMUtils;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.util.ArgsEngine;
import net.maizegenetics.util.DirectoryCrawler;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/**
 *
 * @author terry
 */
public class MergeMultipleTOPMPlugin extends AbstractPlugin {

    private final Logger myLogger = LogManager.getLogger(MergeMultipleTOPMPlugin.class);
    private static String TOPM_FILENAME_REGEX = "(?i).*\\.topm$|.*\\.topm\\.bin";
    private ArgsEngine myArgsEngine = null;
    private String[] myTOPMFileNames = null;
    private String myOutputFilename = null;
    private String myOrigFilename = null;
    private TOPMInterface myOrigTOPM = null;
    private int myOrigTagCount = 0;
    private byte[][] myOrigVariantOff = null;
    private byte[][] myOrigVariantDef = null;
    private boolean[] myChangedRows = null;
    private int[] myChromosomeChangedCounts = new int[20];

    public MergeMultipleTOPMPlugin(Frame parentFrame) {
        super(parentFrame, false);
    }

    @Override
    public DataSet performFunction(DataSet input) {

        myOrigTOPM = TOPMUtils.readTOPM(myOrigFilename);
        myOrigTagCount = myOrigTOPM.getTagCount();
        myLogger.info("performFunction: Number of Original Tags: " + myOrigTagCount);
        myOrigVariantOff = myOrigTOPM.getVariantOff();
        myOrigVariantDef = myOrigTOPM.getVariantDef();
        myChangedRows = new boolean[myOrigTagCount];
        Arrays.fill(myChangedRows, false);

        for (int i = 0; i < myTOPMFileNames.length; i++) {
            if (!myTOPMFileNames[i].equals(myOrigFilename)) {
                processTOPM(myTOPMFileNames[i]);
            }
        }

        for (int x = 0; x < myChromosomeChangedCounts.length; x++) {
            if (myChromosomeChangedCounts[x] != 0) {
                myLogger.info("performFunction: chromosome: " + x + " changed: " + myChromosomeChangedCounts[x]);
            }
        }

        TOPMUtils.writeTOPM(myOrigTOPM, myOutputFilename);

        return null;
    }

    private void printUsage() {
        myLogger.info(
                "\nThe options for the MergeMultipleTOPMPlugin:\n"
                + "-input  Input directory containing TOPM files\n"
                + "-orig Original TOPM\n"
                + "-result  TOPM Output Filename\n");
    }

    @Override
    public void setParameters(String[] args) {
        if (args.length == 0) {
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
        }
        if (myArgsEngine == null) {
            myArgsEngine = new ArgsEngine();
            myArgsEngine.add("-input", "-input", true);
            myArgsEngine.add("-orig", "-orig", true);
            myArgsEngine.add("-result", "-result", true);
        }
        myArgsEngine.parse(args);

        String tempDirectory = myArgsEngine.getString("-input");
        if ((tempDirectory != null) && tempDirectory.length() != 0) {
            File topmDirectory = new File(tempDirectory);
            if (!topmDirectory.isDirectory()) {
                printUsage();
                throw new IllegalArgumentException("MergeMultipleTOPMPlugin: setParameters: The input name you supplied is not a directory: " + tempDirectory);
            }
            myTOPMFileNames = DirectoryCrawler.listFileNames(TOPM_FILENAME_REGEX, topmDirectory.getAbsolutePath());
            if (myTOPMFileNames.length == 0 || myTOPMFileNames == null) {
                printUsage();
                throw new IllegalArgumentException("MergeMultipleTOPMPlugin: setParameters: No TOPM files in: " + tempDirectory);
            } else {
                myLogger.info("setParameters: Using these TOPM files:");
                for (String filename : myTOPMFileNames) {
                    myLogger.info(filename);
                }
            }
        }

        myOrigFilename = myArgsEngine.getString("-orig");
        if ((myOrigFilename == null) || (myOrigFilename.length() == 0)) {
            printUsage();
            throw new IllegalArgumentException("MergeMultipleTOPMPlugin: setParameters: Must define original file");
        }
        File origFile = new File(myOrigFilename);
        if (!origFile.exists()) {
            printUsage();
            throw new IllegalArgumentException("MergeMultipleTOPMPlugin: setParameters: The original file doesn't exist: " + myOrigFilename);
        }

        myOutputFilename = myArgsEngine.getString("-result");
        if ((myOutputFilename == null) || (myOutputFilename.length() == 0)) {
            printUsage();
            throw new IllegalArgumentException("MergeMultipleTOPMPlugin: setParameters: Must define result file");
        }
        File outputFile = new File(myOutputFilename);
        if (outputFile.exists()) {
            printUsage();
            throw new IllegalArgumentException("MergeMultipleTOPMPlugin: setParameters: The output file already exists: " + myOutputFilename);
        }

    }

    private void processTOPM(String filename) {

        myLogger.info("processTOPM: " + filename);
        DataInputStream dis = null;
        int tagsInput = 0;
        try {
            dis = new DataInputStream(new BufferedInputStream(new FileInputStream(filename), 65536));
            int tagNum = dis.readInt();
            int tagLengthInLong = dis.readInt();
            int maxVariants = dis.readInt();
            for (int row = 0; row < tagNum; row++) {
                tagsInput++;
                processTag(dis, row, tagLengthInLong, maxVariants);
                if (row % 1000000 == 0) {
                    myLogger.info("processTOPM: Tags Read: " + row);
                }
            }
            myLogger.info("processTOPM: Number of Tags: " + tagsInput);
        } catch (Exception e) {
            myLogger.error("processTOPM: Error Reading Tag: " + tagsInput);
            e.printStackTrace();
            throw new IllegalStateException("MergeMultipleTOPMPlugin: processTOPM: Problem processing: " + filename);
        } finally {
            try {
                dis.close();
            } catch (Exception e) {
                // do nothing
            }
        }

    }

    private void processTag(DataInputStream dis, int row, int tagLengthInLong, int maxVariants) throws IOException {

        long[] tags = new long[tagLengthInLong];
        for (int j = 0; j < tagLengthInLong; j++) {
            tags[j] = dis.readLong();
        }
        byte tagLength = dis.readByte();
        byte multimaps = dis.readByte();
        int chromosome = dis.readInt();
        byte strand = dis.readByte();
        int startPosition = dis.readInt();
        int endPosition = dis.readInt();
        byte divergence = dis.readByte();
        byte[] variantPosOff = new byte[maxVariants];
        byte[] variantDef = new byte[maxVariants];
        for (int j = 0; j < maxVariants; j++) {
            variantPosOff[j] = dis.readByte();
            variantDef[j] = dis.readByte();
        }
        byte dcoP = dis.readByte();
        byte mapP = dis.readByte();

        if (myOrigTOPM.getTagLength(row) != tagLength) {
            myLogger.error("processTag: " + row + " Tag Length: " + tagLength + " doesn't match Original: " + myOrigTOPM.getTagLength(row));
        }

        if (myOrigTOPM.getMultiMaps(row) != multimaps) {
            myLogger.error("processTag: " + row + " Multi Maps: " + multimaps + " doesn't match Original: " + myOrigTOPM.getMultiMaps(row));
        }

        if (myOrigTOPM.getChromosome(row) != chromosome) {
            myLogger.error("processTag: " + row + " Chromosome: " + chromosome + " doesn't match Original: " + myOrigTOPM.getChromosome(row));
        }

        if (myOrigTOPM.getStrand(row) != strand) {
            myLogger.error("processTag: " + row + " Strand: " + strand + " doesn't match Original: " + myOrigTOPM.getStrand(row));
        }

        if (myOrigTOPM.getStartPosition(row) != startPosition) {
            myLogger.error("processTag: " + row + " Start Position: " + startPosition + " doesn't match Original: " + myOrigTOPM.getStartPosition(row));
        }

        if (myOrigTOPM.getEndPosition(row) != endPosition) {
            myLogger.error("processTag: " + row + " End Position: " + endPosition + " doesn't match Original: " + myOrigTOPM.getEndPosition(row));
        }

        if (myOrigTOPM.getDivergence(row) != divergence) {
            myLogger.error("processTag: " + row + " Divergence: " + divergence + " doesn't match Original: " + myOrigTOPM.getDivergence(row));
        }

        if (myOrigTOPM.getDcoP(row) != dcoP) {
            myLogger.error("processTag: " + row + " DcoP: " + dcoP + " doesn't match Original: " + myOrigTOPM.getDcoP(row));
        }

        if (myOrigTOPM.getMapP(row) != mapP) {
            myLogger.error("processTag: " + row + " MapP: " + mapP + " doesn't match Original: " + myOrigTOPM.getMapP(row));
        }

        boolean variantsEqual = true;

        for (int i = 0; i < maxVariants; i++) {
            if (myOrigVariantDef[row][i] != variantDef[i]) {
                variantsEqual = false;
                break;
            }
            if (myOrigVariantOff[row][i] != variantPosOff[i]) {
                variantsEqual = false;
                break;
            }
        }

        if (!variantsEqual) {

            if (myChangedRows[row]) {
                throw new IllegalStateException("MergeMultipleTOPMPlugin: processTag: " + row + " has already been merged.");
            } else {
                myChangedRows[row] = true;
                if (chromosome < myChromosomeChangedCounts.length) {
                    myChromosomeChangedCounts[chromosome]++;
                }
            }

            for (int i = 0; i < maxVariants; i++) {
                myOrigTOPM.setVariantDef(row, i, variantDef[i]);
                myOrigTOPM.setVariantPosOff(row, i, variantPosOff[i]);
            }

        }

    }

    @Override
    public ImageIcon getIcon() {
        return null;
    }

    @Override
    public String getButtonName() {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getToolTipText() {
        throw new UnsupportedOperationException("Not supported yet.");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy