All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.gbs.pana.PanABuildPivotTBTPlugin Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

There is a newer version: 5.2.94
Show newest version
package net.maizegenetics.analysis.gbs.pana;

import ch.systemsx.cisd.hdf5.HDF5Factory;
import ch.systemsx.cisd.hdf5.IHDF5Writer;
import ch.systemsx.cisd.hdf5.IHDF5WriterConfigurator;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.util.ArgsEngine;
import org.apache.log4j.Logger;

import javax.swing.*;
import java.awt.*;
import java.io.*;
import java.util.Arrays;
import net.maizegenetics.dna.tag.TagCounts;
import net.maizegenetics.dna.tag.TagsByTaxa.FilePacking;
import static net.maizegenetics.dna.tag.TagsByTaxaByteHDF5TagGroups.encodeBySign;
import net.maizegenetics.util.Tassel5HDF5Constants;

/** 
 * Split large TagsByTaxaByteHDF5TagGroup file into small sub TBTs. Designed to submit genetic mapping jobs in cluster
 * 
 * @author Fei Lu
 */
public class PanABuildPivotTBTPlugin extends AbstractPlugin {

    static long timePoint1;
    private ArgsEngine engine = null;
    private Logger logger = Logger.getLogger(PanABuildPivotTBTPlugin.class);
    
    String masterTagCountFileS = null;
    String tagCountDirS = null;
    String tbtFileS = null;
    
    IHDF5Writer h5 = null;

    public PanABuildPivotTBTPlugin() {
        super(null, false);
    }

    public PanABuildPivotTBTPlugin(Frame parentFrame) {
        super(parentFrame, false);
    }

    private void printUsage() {
        logger.info(
                "\n\nUsage is as follows:\n"
                + " -m  master TagCount file\n"
                + " -d  directory containing tagCount files\n"  
                + " -o  output TBT\n");
    }

    @Override
    public DataSet performFunction(DataSet input) {
        File[] tcFiles = new File (this.tagCountDirS).listFiles(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.toLowerCase().endsWith("cnt");
            }
        });
        Arrays.sort(tcFiles);
        String[] taxaNames = new String[tcFiles.length];
        for (int i = 0; i < taxaNames.length; i++) {
            taxaNames[i] = tcFiles[i].getName().substring(0, tcFiles[i].getName().length()-4);
        }
        TagCounts masterTc = new TagCounts(this.masterTagCountFileS, FilePacking.Byte);
        TagCounts[] tcs = new TagCounts[taxaNames.length];
        for (int i = 0; i < tcs.length; i++) {
            tcs[i] = new TagCounts(tcFiles[i].getAbsolutePath(), FilePacking.Byte);
        }
        this.naiveBuildTBT(masterTc, taxaNames, tcs);
        return null;
    }

    private void naiveBuildTBT (TagCounts masterTC, String[] taxaNames, TagCounts[] tcs) {
        long[][] newTags = new long[masterTC.getTagSizeInLong()][masterTC.getTagCount()];
        for (int i = 0; i < masterTC.getTagCount(); i++) {
            long[] ct = masterTC.getTag(i);
            for (int j = 0; j < masterTC.getTagSizeInLong(); j++) {
                newTags[j][i] = ct[j];
            }
        }
        IHDF5WriterConfigurator config = HDF5Factory.configure(new File(this.tbtFileS));
        System.out.println("Creating HDF5 file: " + tbtFileS);
        config.overwrite();
        config.dontUseExtendableDataTypes();
        config.useUTF8CharacterEncoding();
        h5 = config.writer();
        h5.int32().setAttr("/", "tagCount", masterTC.getTagCount());
        h5.int32().setAttr("/", "chunkSize", Tassel5HDF5Constants.BLOCK_SIZE);
        h5.int32().setAttr("/", "tagLengthInLong", masterTC.getTagSizeInLong());
        h5.int32().setAttr("/", "taxaNum", taxaNames.length);
        //create tag matrix
        h5.int64().createMatrix("tags", masterTC.getTagSizeInLong(), masterTC.getTagCount(), masterTC.getTagSizeInLong(), masterTC.getTagCount());
        h5.writeLongMatrix("tags", newTags);
        h5.int8().createArray("tagLength", masterTC.getTagCount());
        h5.writeByteArray("tagLength", masterTC.getTagLength());
         //create TBT matrix
        h5.object().createGroup("tbttg");
        int tagChunks = masterTC.getTagCount() >> 16;
        if (masterTC.getTagCount() % Tassel5HDF5Constants.BLOCK_SIZE > 0) {
            tagChunks++;
        }
        System.out.println(Tassel5HDF5Constants.BLOCK_SIZE);
        System.out.printf("tagChunks %d Div %g %n", tagChunks, (double) masterTC.getTagCount() / (double) Tassel5HDF5Constants.BLOCK_SIZE);
        h5.int32().setAttr("tbttg/", "tagCount", masterTC.getTagCount());
        h5.int32().setAttr("tbttg/", "tagChunks", tagChunks);

        for (int tc = 0; tc < tagChunks; tc++) {
            h5.object().createGroup("tbttg/c" + tc);
        }
        h5.string().createArrayVL("tbttg/taxaNames", taxaNames.length);
        h5.string().writeArrayVL("tbttg/taxaNames", taxaNames);
        
        long[] t;
        byte cnt = 0;
        int index;
        for (int i = 0; i < masterTC.getTagCount(); i++) {
            byte[] td = new byte[taxaNames.length];
            t = masterTC.getTag(i);
            for (int j = 0; j < taxaNames.length; j++) {
                index = tcs[j].getTagIndex(t);
                if (index < 0) cnt = 0;
                else cnt = (byte)tcs[j].getReadCount(index);
                td[j] = cnt;
            }
            int chunk = i >> 16;
            String d = "tbttg/c" + chunk + "/" + i;
            byte[] deftc = encodeBySign(td);
            h5.int8().createArray(d, deftc.length);
            h5.writeByteArray(d, deftc);
        }   
    }
    
    @Override
    public void setParameters(String[] args) {
        if (args.length == 0) {
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
        }
        
        if (engine == null) {
            engine = new ArgsEngine();
            engine.add("-m", "--master-TC", true);
            engine.add("-d", "--directory-TC", true);
            engine.add("-o", "--output-TBT", true);
            engine.parse(args);
        }

        if (engine.getBoolean("-m")) {
            this.masterTagCountFileS = engine.getString("-m");
        }
        else {
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
        }

        if (engine.getBoolean("-d")) {
            this.tagCountDirS = engine.getString("-d");
        } 
        else {
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
        }
        
        
        if (engine.getBoolean("-o")) {
            this.tbtFileS = engine.getString("-o");
        } 
        else {
            printUsage();
            throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
        }
        
    }

    @Override
    public ImageIcon getIcon() {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getButtonName() {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public String getToolTipText() {
        throw new UnsupportedOperationException("Not supported yet.");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy