All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.gbs.TOPMSummaryPlugin Maven / Gradle / Ivy

/*
 * TOPMSummaryPlugin
 */
package net.maizegenetics.analysis.gbs;

import java.awt.Frame;
import java.io.BufferedWriter;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import javax.swing.ImageIcon;
import net.maizegenetics.dna.map.TagsOnPhysicalMap;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.util.ArgsEngine;
import net.maizegenetics.util.Utils;
import org.apache.log4j.Logger;

/**
 *
 * @author terry
 */
public class TOPMSummaryPlugin extends AbstractPlugin {

    private final Logger myLogger = Logger.getLogger(TOPMSummaryPlugin.class);
    private ArgsEngine myArgsEngine = null;
    private String myInputFilename = null;
    private TagsOnPhysicalMap myInputTOPM = null;
    private int myTagCount = 0;
    private int[] myChromosomes;
    private Map[] myTagsPerSite;
    private Map>[] myVariantDefsPerPosition;
    private int myNumUndefinedStrandedTags = 0;
    private Set myUndefinedStrandValues = new HashSet();
    private String myOutputFilename = null;
    private int[] myNumTagsPerVariantsDefined;
    private TreeSet[] myPositionsOnMaxVariantTags;

    public TOPMSummaryPlugin(Frame parentFrame) {
        super(parentFrame, false);
    }

    @Override
    public DataSet performFunction(DataSet input) {

        myInputTOPM = new TagsOnPhysicalMap(myInputFilename, true);
        myTagCount = myInputTOPM.getTagCount();
        myLogger.info("performFunction: Number of Tags: " + myTagCount);

        myChromosomes = myInputTOPM.getChromosomes();
        Arrays.sort(myChromosomes);

        myNumTagsPerVariantsDefined = new int[myInputTOPM.getMaxNumVariants() + 1];

        myPositionsOnMaxVariantTags = new TreeSet[myChromosomes.length];
        for (int m = 0; m < myChromosomes.length; m++) {
            myPositionsOnMaxVariantTags[m] = new TreeSet();
        }

        myTagsPerSite = new TreeMap[myChromosomes.length];
        for (int m = 0; m < myChromosomes.length; m++) {
            myTagsPerSite[m] = new TreeMap();
        }
        myVariantDefsPerPosition = new TreeMap[myChromosomes.length];
        for (int m = 0; m < myChromosomes.length; m++) {
            myVariantDefsPerPosition[m] = new TreeMap>();
        }

        for (int i = 0; i < myTagCount; i++) {
            int startPos = myInputTOPM.getStartPosition(i);
            int endPos = myInputTOPM.getEndPosition(i);
            byte strand = myInputTOPM.getStrand(i);
            int chrom = myInputTOPM.getChromosome(i);
            int index = Arrays.binarySearch(myChromosomes, chrom);

            //String tag = BaseEncoder.getSequenceFromLong(myInputTOPM.getTag(i));
            if (strand == 1) {
                if (index < 0) {
                    myLogger.error("performFunction: tag: " + i + " chromosome: " + chrom + " not reported by getChromosomes()");
                    continue;
                }
                if (startPos > endPos) {
                    myLogger.error("performFunction: tag: " + i + " invalid state: strand: " + strand + "  start position: " + startPos + "  end position: " + endPos);
                    continue;
                }
                List positionsOnTag = new ArrayList();
                int numDefinedVariants = 0;
                for (int j = 0; j < myInputTOPM.getMaxNumVariants(); j++) {
                    int offset = myInputTOPM.getVariantPosOff(i, j);
                    byte def = myInputTOPM.getVariantDef(i, j);
                    if ((offset != Byte.MIN_VALUE) && (def != Byte.MIN_VALUE)) {
                        numDefinedVariants++;
                        int position = startPos + offset;
                        positionsOnTag.add(position);
                        Integer count = myTagsPerSite[index].get(position);
                        if (count == null) {
                            myTagsPerSite[index].put(position, 1);
                            Set temp = new HashSet();
                            temp.add(def);
                            myVariantDefsPerPosition[index].put(position, temp);
                        } else {
                            myTagsPerSite[index].put(position, count + 1);
                            Set temp = myVariantDefsPerPosition[index].get(position);
                            temp.add(def);
                        }
                    }
                }
                if (numDefinedVariants == myInputTOPM.getMaxNumVariants()) {
                    myPositionsOnMaxVariantTags[index].addAll(positionsOnTag);
                }
                myNumTagsPerVariantsDefined[numDefinedVariants]++;
            } else if (strand == -1) {
                if (index < 0) {
                    myLogger.error("performFunction: tag: " + i + " chromosome: " + chrom + " not reported by getChromosomes()");
                    continue;
                }
                if (startPos < endPos) {
                    myLogger.error("performFunction: tag: " + i + " invalid state: strand: " + strand + "  start position: " + startPos + "  end position: " + endPos);
                    continue;
                }
                List positionsOnTag = new ArrayList();
                int numDefinedVariants = 0;
                for (int j = 0; j < myInputTOPM.getMaxNumVariants(); j++) {
                    int offset = myInputTOPM.getVariantPosOff(i, j);
                    byte def = myInputTOPM.getVariantDef(i, j);
                    if ((offset != Byte.MIN_VALUE) && (def != Byte.MIN_VALUE)) {
                        numDefinedVariants++;
                        int position = startPos + offset;
                        positionsOnTag.add(position);
                        Integer count = myTagsPerSite[index].get(position);
                        if (count == null) {
                            myTagsPerSite[index].put(position, 1);
                            Set temp = new HashSet();
                            temp.add(def);
                            myVariantDefsPerPosition[index].put(position, temp);
                        } else {
                            myTagsPerSite[index].put(position, count + 1);
                            Set temp = myVariantDefsPerPosition[index].get(position);
                            temp.add(def);
                        }
                    }
                }
                if (numDefinedVariants == myInputTOPM.getMaxNumVariants()) {
                    myPositionsOnMaxVariantTags[index].addAll(positionsOnTag);
                }
                myNumTagsPerVariantsDefined[numDefinedVariants]++;
            } else {
                myNumUndefinedStrandedTags++;
                myUndefinedStrandValues.add(strand);
            }
        }

        for (int i = 0; i < myChromosomes.length; i++) {
            Iterator itr = myPositionsOnMaxVariantTags[i].iterator();
            StringBuilder builder = new StringBuilder();
            builder.append("performFunction: Chromosome: ");
            builder.append(myChromosomes[i]);
            builder.append(" Positions on Tags with Max Variants: ");
            boolean first = true;
            while (itr.hasNext()) {
                if (!first) {
                    builder.append(", ");
                } else {
                    first = false;
                }
                builder.append(itr.next());
            }
            myLogger.info(builder.toString());
        }

        myLogger.info("performFunction: Number of Tags with Undefined Strands: " + myNumUndefinedStrandedTags);
        Iterator itr = myUndefinedStrandValues.iterator();
        while (itr.hasNext()) {
            myLogger.info("performFunction: Undefined Strand Value: " + itr.next());
        }

        int totalSNPs = 0;
        for (int i = 0; i < myChromosomes.length; i++) {
            totalSNPs += myTagsPerSite[i].size();
            myLogger.info("performFunction: Chromosome: " + myChromosomes[i] + " Number of SNPs: " + myTagsPerSite[i].size());
        }
        myLogger.info("performFunction: Total SNPs: " + totalSNPs);

        for (int i = 0; i <= myInputTOPM.getMaxNumVariants(); i++) {
            myLogger.info("performFunction: Number of Tags: " + myNumTagsPerVariantsDefined[i] + " Has: " + i + " Variants Defined");
        }

        printSummary();
        return null;
    }

    private void printSummary() {
        BufferedWriter writer = null;

        try {
            writer = Utils.getBufferedWriter(myOutputFilename);
            writer.append("Chromosome\tPosition\tNum Tags\tVariant Defs\n");
            for (int c = 0; c < myChromosomes.length; c++) {
                Iterator itr = myTagsPerSite[c].entrySet().iterator();
                while (itr.hasNext()) {
                    Map.Entry entry = (Map.Entry) itr.next();
                    writer.append(myChromosomes[c] + "\t" + entry.getKey() + "\t" + entry.getValue() + "\t");
                    Set defSet = myVariantDefsPerPosition[c].get(entry.getKey());
                    Iterator itr2 = defSet.iterator();
                    boolean notFirst = false;
                    while (itr2.hasNext()) {
                        if (notFirst) {
                            writer.append(",");
                        } else {
                            notFirst = true;
                        }
                        writer.append(NucleotideAlignmentConstants.getHaplotypeNucleotide(((Byte) itr2.next()).byteValue()));
                    }
                    writer.append("\n");
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                writer.close();
            } catch (Exception ex) {
                // do nothing
            }
        }
    }

    private void printUsage() {
        myLogger.info(
                "\nThe options for the TOPMSummaryPlugin:\n"
                + "-input Input TOPM\n"
                + "-output Output Filename\n");
    }

    @Override
    public void setParameters(String[] args) {

        if (args.length == 0) {
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
        }

        if (myArgsEngine == null) {
            myArgsEngine = new ArgsEngine();
            myArgsEngine.add("-input", "-input", true);
            myArgsEngine.add("-output", "-output", true);
        }
        myArgsEngine.parse(args);

        myInputFilename = myArgsEngine.getString("-input");
        if ((myInputFilename == null) || (myInputFilename.length() == 0)) {
            printUsage();
            throw new IllegalArgumentException("TOPMSummaryPlugin: setParameters: Must define input file");
        }
        File inputFile = new File(myInputFilename);
        if (!inputFile.exists()) {
            printUsage();
            throw new IllegalArgumentException("TOPMSummaryPlugin: setParameters: The input file doesn't exist: " + myInputFilename);
        }

        myOutputFilename = myArgsEngine.getString("-output");
        if ((myOutputFilename == null) || (myOutputFilename.length() == 0)) {
            printUsage();
            throw new IllegalArgumentException("TOPMSummaryPlugin: setParameters: Must define output file");
        }
        File outputFile = new File(myOutputFilename);
        if (outputFile.exists()) {
            printUsage();
            throw new IllegalArgumentException("TOPMSummaryPlugin: setParameters: The output file already exists: " + myOutputFilename);
        }

    }

    @Override
    public ImageIcon getIcon() {
        return null;
    }

    @Override
    public String getButtonName() {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getToolTipText() {
        throw new UnsupportedOperationException("Not supported yet.");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy