All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.data.FindInversionsPlugin Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

The newest version!
/*
 *  FindInversionsPlugin
 * 
 *  Created on May 9, 2016
 */
package net.maizegenetics.analysis.data;

import com.google.common.collect.Range;
import java.awt.Frame;
import java.io.BufferedWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.TreeMap;
import javax.swing.ImageIcon;
import net.maizegenetics.analysis.distance.DistanceMatrixPlugin;
import net.maizegenetics.analysis.distance.MultiDimensionalScalingPlugin;
import net.maizegenetics.analysis.filter.FilterSiteBuilderPlugin;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.phenotype.CorePhenotype;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.Datum;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.TableReportBuilder;
import net.maizegenetics.util.Utils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/**
 *
 * @author Terry Casstevens
 */
public class FindInversionsPlugin extends AbstractPlugin {

    private static final Logger myLogger = LogManager.getLogger(FindInversionsPlugin.class);

    public static enum WINDOW_UNIT {
        Sites, Positions
    };

    private PluginParameter myWindowUnit = new PluginParameter.Builder<>("windowUnit", WINDOW_UNIT.Sites, WINDOW_UNIT.class)
            .description("Window Unit")
            .range(WINDOW_UNIT.values())
            .build();

    private PluginParameter myStepSize = new PluginParameter.Builder<>("stepSize", 100, Integer.class)
            .description("Step Size")
            .range(Range.atLeast(0))
            .build();

    private PluginParameter myWindowSize = new PluginParameter.Builder<>("windowSize", 500, Integer.class)
            .description("Window Size")
            .range(Range.atLeast(0))
            .build();

    private PluginParameter myNumPCAs = new PluginParameter.Builder<>("numPCAs", 1, Integer.class)
            .description("")
            .range(Range.atLeast(1))
            .guiName("Number of PCAs")
            .build();

    private PluginParameter myOutputFile = new PluginParameter.Builder<>("outputFile", null, String.class)
            .description("")
            .outFile()
            .build();

    public FindInversionsPlugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }

    @Override
    protected void preProcessParameters(DataSet input) {
        List data = input.getDataOfType(GenotypeTable.class);
        if (data.size() != 1) {
            throw new IllegalArgumentException("FindInversionsPlugin: preProcessParameters: must input 1 GenotypeTable.");
        }
    }

    @Override
    public DataSet processData(DataSet input) {

        Datum data = input.getDataOfType(GenotypeTable.class).get(0);
        GenotypeTable genotypeTable = (GenotypeTable) data.getData();

        DistanceMatrixPlugin distance = new DistanceMatrixPlugin(getParentFrame(), false);

        MultiDimensionalScalingPlugin mds = new MultiDimensionalScalingPlugin(getParentFrame(), false);
        mds.numberOfAxes(numPCAs());

        Chromosome[] chromosomes = genotypeTable.chromosomes();

        for (Chromosome c : chromosomes) {

            FilterSiteBuilderPlugin filterChr = new FilterSiteBuilderPlugin(null, false)
                    .startChr(c)
                    .endChr(c);
            DataSet genotypeDataSet = filterChr.performFunction(input);
            GenotypeTable genotypeChr = (GenotypeTable) genotypeDataSet.getData(0).getData();
            int numSites = genotypeChr.numberOfSites();

            for (int start = 0;; start += stepSize()) {

                int startSite = -1;
                int endSite = -1;
                if (windowUnit() == WINDOW_UNIT.Sites) {
                    if (start > numSites) {
                        break;
                    }
                    startSite = start;
                    endSite = Math.min(start + windowSize() - 1, numSites - 1);
                } else if (windowUnit() == WINDOW_UNIT.Positions) {
                    startSite = genotypeChr.siteOfPhysicalPosition(start, c);
                    endSite += start + windowSize();
                    if (startSite < 0) {
                        startSite = -(startSite + 1);
                        if (startSite >= numSites) {
                            startSite = numSites - 1;
                        }
                    }
                    if (startSite > numSites) {
                        break;
                    }
                    endSite = genotypeChr.siteOfPhysicalPosition(endSite, c);
                    if (endSite < 0) {
                        endSite = -endSite - 2;
                        if (endSite >= numSites) {
                            endSite = numSites - 1;
                        }
                    }
                    if (startSite > endSite) {
                        continue;
                    }
                }

                FilterSiteBuilderPlugin filter = new FilterSiteBuilderPlugin(null, false)
                        .startSite(startSite)
                        .endSite(endSite);
                DataSet filteredGenotype = filter.performFunction(genotypeDataSet);

                DataSet distanceMatrix = distance.performFunction(filteredGenotype);

                try {
                    myLogger.info("Starting MDS...");
                    DataSet mdsResults = mds.processData(distanceMatrix);
                    myLogger.info("Finsihed MDS...");
                    for (int i = 1; i <= numPCAs(); i++) {
                        scoreSinglePCA((CorePhenotype) mdsResults.getDataOfType(CorePhenotype.class).get(0).getData(),
                                new ChrPos(c, startSite, endSite, genotypeChr.chromosomalPosition(startSite), genotypeChr.chromosomalPosition(endSite), i), i);
                    }
                } catch (Exception e) {
                    myLogger.debug(e.getMessage(), e);
                    myLogger.warn("Problem calculating MDS for window chr: " + c.getName() + " start site: " + startSite + " end site: " + endSite);
                }

            }

        }

        String[] columnHeaders = new String[]{"Chromosome", "Start Position", "End Position", "PCA Num", "Rank"};
        TableReportBuilder builder = TableReportBuilder.getInstance("Candidates", columnHeaders);
        try (BufferedWriter writer = Utils.getBufferedWriter(Utils.addSuffixIfNeeded(outputFile(), ".txt"))) {
            writer.write("Chromosome\tStart Site\tEnd Site\tStart Postion\tEnd Position\tPCA Num\tPeak Count 1\tGap Count\tPeak Count 2\tGap Count\tNPeak Count 3\n");
            for (Map.Entry> current : myResults.entrySet()) {
                ChrPos chrPos = current.getKey();
                List peakNew = current.getValue();
                if ((peakNew.get(0) / peakNew.get(1) > 1.8f)
                        && (peakNew.get(2) / peakNew.get(1) > 1.8f)
                        && (peakNew.get(2) / peakNew.get(3) > 1.8f)
                        && (peakNew.get(4) / peakNew.get(3) > 1.8f)) {
                    Object[] row = new Object[5];
                    row[0] = chrPos.myChr;
                    row[1] = chrPos.myStartPos;
                    row[2] = chrPos.myEndPos;
                    row[3] = chrPos.myPCANum;
                    row[4] = (peakNew.get(0) - peakNew.get(1))
                            + (peakNew.get(2) - peakNew.get(1))
                            + (peakNew.get(2) - peakNew.get(3))
                            + (peakNew.get(4) - peakNew.get(3));
                    builder.addElements(row);
                }
                writer.write(chrPos.myChr.getName() + "\t" + chrPos.myStartSite + "\t" + chrPos.myEndSite + "\t" + chrPos.myStartPos + "\t" + chrPos.myEndPos + "\t" + chrPos.myPCANum);
                int numBigGaps = 0;
                int numSmallRegionPeaks = 0;
                int count = 0;
                for (Float peak : current.getValue()) {
                    int resultType = count % 3;
                    if ((resultType == 0) && (peak > 20.0f)) {
                        numBigGaps++;
                    }
                    if ((resultType == 2) && (peak < 20.0f)) {
                        numSmallRegionPeaks++;
                    }
                    count++;
                    writer.write("\t" + peak);
                }
                if ((numBigGaps >= 2) && (numSmallRegionPeaks >= 3)) {
                    Object[] row = new Object[3];
                    row[0] = chrPos.myChr;
                    row[1] = chrPos.myStartPos;
                    row[2] = chrPos.myEndPos;
                    builder.addElements(row);
                }
                writer.write("\n");
            }
        } catch (Exception e) {
            myLogger.debug(e.getMessage(), e);
            throw new IllegalStateException("Problem writing file: " + outputFile());
        } finally {
            myResults.clear();
        }

        return new DataSet(new Datum("Candidate Inversions", builder.build(), null), this);
    }

    private void scoreSinglePCA(CorePhenotype pcaResults, ChrPos chrPos, int whichPCA) {

        int rowCount = (int) pcaResults.getRowCount();
        int numBins = 98;
        float[] pcaValues = new float[rowCount];
        for (int i = 0; i < rowCount; i++) {
            pcaValues[i] = (float) pcaResults.getRow(i)[whichPCA];
        }

        Arrays.sort(pcaValues);
        float minValue = pcaValues[0];
        float maxValue = pcaValues[rowCount - 1];
        float range = maxValue - minValue;
        float increment = range / ((float) numBins - 1.0f);

        float step = minValue;
        int[] bins = new int[numBins];
        int count = 0;
        for (int i = 0; i < numBins - 1; i++) {
            step += increment;
            while ((count < rowCount) && (pcaValues[count] < step)) {
                bins[i]++;
                count++;
            }
        }
        bins[numBins - 1] = rowCount - count;

        List peaksGapsWidths = new ArrayList<>();

        float peak1 = 0.0f;
        for (int i = 7; i <= 41; i++) {
            peak1 += (float) bins[i];
        }
        peaksGapsWidths.add(peak1);

        float gap1 = 0.0f;
        for (int i = 30; i <= 43; i++) {
            gap1 += bins[i];
        }
        peaksGapsWidths.add(gap1);

        float peak2 = 0.0f;
        for (int i = 32; i <= 66; i++) {
            peak2 += (float) bins[i];
        }
        peaksGapsWidths.add(peak2);

        float gap2 = 0.0f;
        for (int i = 55; i <= 68; i++) {
            gap2 += bins[i];
        }
        peaksGapsWidths.add(gap2);

        float peak3 = 0.0f;
        for (int i = 57; i <= 91; i++) {
            peak3 += (float) bins[i];
        }
        peaksGapsWidths.add(peak3);

        myResults.put(chrPos, peaksGapsWidths);

    }

    private Map> myResults = new TreeMap<>();
    private Map> myResult = new TreeMap<>();

    private void scoreMDS(CorePhenotype pcaResults, ChrPos chrPos) {

        myEdges.clear();
        Cluster.clearCachedClusters();

        long rowCount = pcaResults.getRowCount();
        for (long i = 0; i < rowCount; i++) {
            Object[] current = pcaResults.getRow(i);
            Taxon taxon1 = (Taxon) current[0];
            float pca1x = (float) current[1];
            float pca1y = (float) current[2];
            Cluster first = Cluster.getInstance(taxon1, pca1x, pca1y);
            for (long j = i + 1; j < rowCount; j++) {
                Object[] next = pcaResults.getRow(j);
                Taxon taxon2 = (Taxon) next[0];
                float pca2x = (float) next[1];
                float pca2y = (float) next[2];
                Cluster second = Cluster.getInstance(taxon2, pca2x, pca2y);
                Edge edge = new Edge(first, second);
                myEdges.add(edge);
            }
        }

        reduce(3);

        for (Edge edge : myEdges) {
            System.out.println(edge.myCluster1 + "\t" + edge.myCluster2 + "\t" + edge.myDistance);
        }

        myResult.put(chrPos, new PriorityQueue<>(myEdges));

    }

    private void reduce(int numClusters) {
        int numEdges = numClusters * (numClusters + 1) / 2 - numClusters;
        reduceClusters(numEdges);
    }

    private void reduceClusters(int numEdges) {

        if (myEdges.size() <= numEdges) {
            return;
        }

        Edge current = myEdges.remove();
        Cluster cluster1 = current.myCluster1;
        Cluster cluster2 = current.myCluster2;
        Cluster newCluster = Cluster.getInstance(cluster1, cluster2);

        int weight1 = cluster1.numTaxa();
        int weight2 = cluster2.numTaxa();
        int totalWeight = weight1 + weight2;

        Map edges1 = new HashMap<>(cluster1.myEdges);
        Map edges2 = new HashMap<>(cluster2.myEdges);

        for (Map.Entry entry : edges1.entrySet()) {
            Edge firstEdge = entry.getValue();
            Cluster same = entry.getKey();
            if (same == cluster2) {
                removeEdge(firstEdge);
            } else {
                Edge secondEdge = edges2.get(same);
                float distance = ((firstEdge.myDistance * (float) weight1) + (secondEdge.myDistance * (float) weight2)) / (float) totalWeight;
                Edge newEdge = new Edge(newCluster, same);
                removeEdge(firstEdge);
                removeEdge(secondEdge);
                myEdges.add(newEdge);
            }
        }

        reduceClusters(numEdges);

    }

    private void removeEdge(Edge edge) {
        edge.myCluster1.myEdges.remove(edge.myCluster2);
        edge.myCluster2.myEdges.remove(edge.myCluster1);
        myEdges.remove(edge);
    }

    PriorityQueue myEdges = new PriorityQueue<>();

    private static class Cluster {

        private static Map myInstances = new HashMap<>();

        private final List myList = new ArrayList<>();
        private final Map myEdges = new HashMap<>();
        private final float myX;
        private final float myY;

        private Cluster(Taxon taxon, float x, float y) {
            myList.add(taxon);
            myX = x;
            myY = y;
        }

        private Cluster(List taxa, float x, float y) {
            myList.addAll(taxa);
            myX = x;
            myY = y;
        }

        public static Cluster getInstance(Taxon taxon, float x, float y) {
            Cluster result = myInstances.get(taxon);
            if (result == null) {
                result = new Cluster(taxon, x, y);
                myInstances.put(taxon, result);
            }
            return result;
        }

        public static Cluster getInstance(Cluster cluster1, Cluster cluster2) {
            List result = new ArrayList<>();
            result.addAll(cluster1.myList);
            result.addAll(cluster2.myList);
            float weight1 = (float) cluster1.numTaxa();
            float weight2 = (float) cluster2.numTaxa();
            float totalWeight = weight1 + weight2;
            return new Cluster(result, (cluster1.myX * weight1 + cluster2.myX * weight2) / totalWeight, (cluster1.myY * weight1 + cluster2.myY * weight2) / totalWeight);
        }

        public int numTaxa() {
            return myList.size();
        }

        public void addEdge(Edge edge) {
            if (edge.myCluster1 != this) {
                myEdges.put(edge.myCluster1, edge);
            } else {
                myEdges.put(edge.myCluster2, edge);
            }
        }

        public static void clearCachedClusters() {
            myInstances.clear();
        }

        @Override
        public String toString() {
            return "Cluster{" + "myList=" + myList + '}';
        }

    }

    private static class Edge implements Comparable {

        private final Cluster myCluster1;
        private final Cluster myCluster2;
        private final float myDistance;

        public Edge(Cluster cluster1, Cluster cluster2) {
            myCluster1 = cluster1;
            myCluster2 = cluster2;
            myDistance = calculateDistance(cluster1.myX, cluster1.myY, cluster2.myX, cluster2.myY);
            myCluster1.addEdge(this);
            myCluster2.addEdge(this);
        }

        @Override
        public int compareTo(Edge o) {
            if (myDistance < o.myDistance) {
                return -1;
            } else if (myDistance > o.myDistance) {
                return 1;
            } else {
                return 0;
            }
        }

    }

    /**
     * Calculates distance between two points.
     *
     * @param x1
     * @param y1
     * @param x2
     * @param y2
     * @return
     */
    private static float calculateDistance(float x1, float y1, float x2, float y2) {
        float xSqr = (x1 - x2) * (x1 - x2);
        float ySqr = (y1 - y2) * (y1 - y2);
        return (float) Math.sqrt(xSqr + ySqr);
    }

    private class ChrPos implements Comparable {

        private final Chromosome myChr;
        private final int myStartSite;
        private final int myEndSite;
        private final int myStartPos;
        private final int myEndPos;
        private final int myPCANum;

        public ChrPos(Chromosome chr, int startSite, int endSite, int startPos, int endPos, int pcaNum) {
            myChr = chr;
            myStartSite = startSite;
            myEndSite = endSite;
            myStartPos = startPos;
            myEndPos = endPos;
            myPCANum = pcaNum;
        }

        @Override
        public boolean equals(Object obj) {
            if (!(obj instanceof ChrPos)) {
                return false;
            }
            ChrPos other = (ChrPos) obj;
            return compareTo(other) == 0;
        }

        @Override
        public int hashCode() {
            int hash = 7;
            hash = 97 * hash + Objects.hashCode(this.myChr);
            hash = 97 * hash + this.myStartPos;
            hash = 97 * hash + this.myPCANum;
            return hash;
        }

        @Override
        public int compareTo(ChrPos o) {
            if (myChr.getChromosomeNumber() < o.myChr.getChromosomeNumber()) {
                return -1;
            } else if (myChr.getChromosomeNumber() > o.myChr.getChromosomeNumber()) {
                return 1;
            }

            if (myStartPos < o.myStartPos) {
                return -1;
            } else if (myStartPos > o.myStartPos) {
                return 1;
            }

            if (myPCANum < o.myPCANum) {
                return -1;
            } else if (myPCANum > o.myPCANum) {
                return 1;
            } else {
                return 0;
            }
        }

    }

    /**
     * Window Unit
     *
     * @return Window Unit
     */
    public WINDOW_UNIT windowUnit() {
        return myWindowUnit.value();
    }

    /**
     * Set Window Unit. Window Unit
     *
     * @param value Window Unit
     *
     * @return this plugin
     */
    public FindInversionsPlugin windowUnit(WINDOW_UNIT value) {
        myWindowUnit = new PluginParameter<>(myWindowUnit, value);
        return this;
    }

    /**
     * Step Size
     *
     * @return Step Size
     */
    public Integer stepSize() {
        return myStepSize.value();
    }

    /**
     * Set Step Size. Step Size
     *
     * @param value Step Size
     *
     * @return this plugin
     */
    public FindInversionsPlugin stepSize(Integer value) {
        myStepSize = new PluginParameter<>(myStepSize, value);
        return this;
    }

    /**
     * Window Size
     *
     * @return Window Size
     */
    public Integer windowSize() {
        return myWindowSize.value();
    }

    /**
     * Set Window Size. Window Size
     *
     * @param value Window Size
     *
     * @return this plugin
     */
    public FindInversionsPlugin windowSize(Integer value) {
        myWindowSize = new PluginParameter<>(myWindowSize, value);
        return this;
    }

    /**
     * Num PCAs
     *
     * @return Num PCAs
     */
    public Integer numPCAs() {
        return myNumPCAs.value();
    }

    /**
     * Set Num PCAs. Num PCAs
     *
     * @param value Num PCAs
     *
     * @return this plugin
     */
    public FindInversionsPlugin numPCAs(Integer value) {
        myNumPCAs = new PluginParameter<>(myNumPCAs, value);
        return this;
    }

    /**
     * Output File
     *
     * @return Output File
     */
    public String outputFile() {
        return myOutputFile.value();
    }

    /**
     * Set Output File. Output File
     *
     * @param value Output File
     *
     * @return this plugin
     */
    public FindInversionsPlugin outputFile(String value) {
        myOutputFile = new PluginParameter<>(myOutputFile, value);
        return this;
    }

    @Override
    public String getToolTipText() {
        return "Find Inversions";
    }

    @Override
    public ImageIcon getIcon() {
        URL imageURL = FindInversionsPlugin.class.getResource("/net/maizegenetics/analysis/images/inversion.gif");
        if (imageURL == null) {
            return null;
        } else {
            return new ImageIcon(imageURL);
        }
    }

    @Override
    public String getButtonName() {
        return "Find Inversions";
    }

    @Override
    public String getCitation() {
        return "Mei W, Casstevens T. (May 2016) Third TASSEL Hackathon.";
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy