net.maizegenetics.analysis.gbs.pana.PanABuildPivotTBTPlugin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
package net.maizegenetics.analysis.gbs.pana;
import ch.systemsx.cisd.hdf5.HDF5Factory;
import ch.systemsx.cisd.hdf5.IHDF5Writer;
import ch.systemsx.cisd.hdf5.IHDF5WriterConfigurator;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.util.ArgsEngine;
import org.apache.log4j.Logger;
import javax.swing.*;
import java.awt.*;
import java.io.*;
import java.util.Arrays;
import net.maizegenetics.dna.tag.TagCounts;
import net.maizegenetics.dna.tag.TagsByTaxa.FilePacking;
import static net.maizegenetics.dna.tag.TagsByTaxaByteHDF5TagGroups.encodeBySign;
import net.maizegenetics.util.Tassel5HDF5Constants;
/**
* Split large TagsByTaxaByteHDF5TagGroup file into small sub TBTs. Designed to submit genetic mapping jobs in cluster
*
* @author Fei Lu
*/
public class PanABuildPivotTBTPlugin extends AbstractPlugin {
static long timePoint1;
private ArgsEngine engine = null;
private Logger logger = Logger.getLogger(PanABuildPivotTBTPlugin.class);
String masterTagCountFileS = null;
String tagCountDirS = null;
String tbtFileS = null;
IHDF5Writer h5 = null;
public PanABuildPivotTBTPlugin() {
super(null, false);
}
public PanABuildPivotTBTPlugin(Frame parentFrame) {
super(parentFrame, false);
}
private void printUsage() {
logger.info(
"\n\nUsage is as follows:\n"
+ " -m master TagCount file\n"
+ " -d directory containing tagCount files\n"
+ " -o output TBT\n");
}
@Override
public DataSet performFunction(DataSet input) {
File[] tcFiles = new File (this.tagCountDirS).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith("cnt");
}
});
Arrays.sort(tcFiles);
String[] taxaNames = new String[tcFiles.length];
for (int i = 0; i < taxaNames.length; i++) {
taxaNames[i] = tcFiles[i].getName().substring(0, tcFiles[i].getName().length()-4);
}
TagCounts masterTc = new TagCounts(this.masterTagCountFileS, FilePacking.Byte);
TagCounts[] tcs = new TagCounts[taxaNames.length];
for (int i = 0; i < tcs.length; i++) {
tcs[i] = new TagCounts(tcFiles[i].getAbsolutePath(), FilePacking.Byte);
}
this.naiveBuildTBT(masterTc, taxaNames, tcs);
return null;
}
private void naiveBuildTBT (TagCounts masterTC, String[] taxaNames, TagCounts[] tcs) {
long[][] newTags = new long[masterTC.getTagSizeInLong()][masterTC.getTagCount()];
for (int i = 0; i < masterTC.getTagCount(); i++) {
long[] ct = masterTC.getTag(i);
for (int j = 0; j < masterTC.getTagSizeInLong(); j++) {
newTags[j][i] = ct[j];
}
}
IHDF5WriterConfigurator config = HDF5Factory.configure(new File(this.tbtFileS));
System.out.println("Creating HDF5 file: " + tbtFileS);
config.overwrite();
config.dontUseExtendableDataTypes();
config.useUTF8CharacterEncoding();
h5 = config.writer();
h5.int32().setAttr("/", "tagCount", masterTC.getTagCount());
h5.int32().setAttr("/", "chunkSize", Tassel5HDF5Constants.BLOCK_SIZE);
h5.int32().setAttr("/", "tagLengthInLong", masterTC.getTagSizeInLong());
h5.int32().setAttr("/", "taxaNum", taxaNames.length);
//create tag matrix
h5.int64().createMatrix("tags", masterTC.getTagSizeInLong(), masterTC.getTagCount(), masterTC.getTagSizeInLong(), masterTC.getTagCount());
h5.writeLongMatrix("tags", newTags);
h5.int8().createArray("tagLength", masterTC.getTagCount());
h5.writeByteArray("tagLength", masterTC.getTagLength());
//create TBT matrix
h5.object().createGroup("tbttg");
int tagChunks = masterTC.getTagCount() >> 16;
if (masterTC.getTagCount() % Tassel5HDF5Constants.BLOCK_SIZE > 0) {
tagChunks++;
}
System.out.println(Tassel5HDF5Constants.BLOCK_SIZE);
System.out.printf("tagChunks %d Div %g %n", tagChunks, (double) masterTC.getTagCount() / (double) Tassel5HDF5Constants.BLOCK_SIZE);
h5.int32().setAttr("tbttg/", "tagCount", masterTC.getTagCount());
h5.int32().setAttr("tbttg/", "tagChunks", tagChunks);
for (int tc = 0; tc < tagChunks; tc++) {
h5.object().createGroup("tbttg/c" + tc);
}
h5.string().createArrayVL("tbttg/taxaNames", taxaNames.length);
h5.string().writeArrayVL("tbttg/taxaNames", taxaNames);
long[] t;
byte cnt = 0;
int index;
for (int i = 0; i < masterTC.getTagCount(); i++) {
byte[] td = new byte[taxaNames.length];
t = masterTC.getTag(i);
for (int j = 0; j < taxaNames.length; j++) {
index = tcs[j].getTagIndex(t);
if (index < 0) cnt = 0;
else cnt = (byte)tcs[j].getReadCount(index);
td[j] = cnt;
}
int chunk = i >> 16;
String d = "tbttg/c" + chunk + "/" + i;
byte[] deftc = encodeBySign(td);
h5.int8().createArray(d, deftc.length);
h5.writeByteArray(d, deftc);
}
}
@Override
public void setParameters(String[] args) {
if (args.length == 0) {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
if (engine == null) {
engine = new ArgsEngine();
engine.add("-m", "--master-TC", true);
engine.add("-d", "--directory-TC", true);
engine.add("-o", "--output-TBT", true);
engine.parse(args);
}
if (engine.getBoolean("-m")) {
this.masterTagCountFileS = engine.getString("-m");
}
else {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
if (engine.getBoolean("-d")) {
this.tagCountDirS = engine.getString("-d");
}
else {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
if (engine.getBoolean("-o")) {
this.tbtFileS = engine.getString("-o");
}
else {
printUsage();
throw new IllegalArgumentException("\n\nPlease use the above arguments/options.\n\n");
}
}
@Override
public ImageIcon getIcon() {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String getButtonName() {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public String getToolTipText() {
throw new UnsupportedOperationException("Not supported yet.");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy