net.maizegenetics.analysis.data.MergeRenameDeleteTaxaPlugin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
/*
* MergeRenameDeleteTaxaPlugin
*/
package net.maizegenetics.analysis.data;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.dna.snp.genotypecall.BasicGenotypeMergeRule;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.Datum;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.GeneralAnnotation;
import net.maizegenetics.util.Utils;
import org.apache.log4j.Logger;
import javax.swing.*;
import java.awt.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.List;
/**
* @author jcg233
*/
public class MergeRenameDeleteTaxaPlugin extends net.maizegenetics.plugindef.AbstractPlugin {
@Override
public String pluginDescription() {
return "Rename taxa, merging those with the same new name in the taxa rename key file. "
+ "Taxa with the new name \"delete\" will be removed.";
}
private PluginParameter taxaRenameKey = new PluginParameter.Builder<>("renameKey", null, String.class)
.required(true)
.inFile()
.guiName("Taxa Rename Key")
.description("Tab-delimited file with original and new taxa names. Taxa with the same new name will be merged. "
+ "Taxa with the new name \"delete\" will be removed. Any other columns (and the header line) are ignored.")
.build();
private PluginParameter outputHDF5Genotypes = new PluginParameter.Builder<>("o", null, String.class)
.guiName("Output HDF Genotypes")
.required(true)
.outFile()
.description("Output HDF5 genotypes file").build();
private PluginParameter avgSeqErrorRate = new PluginParameter.Builder<>("eR", 0.01, Double.class)
.guiName("Avg Seq Error Rate")
.description("Average sequencing error rate per base (used to decide between heterozygous and homozygous calls when merging taxa)")
.build();
private PluginParameter noDepthOutput = new PluginParameter.Builder<>("ndo", false, Boolean.class)
.guiName("No Depth Output")
.description("No depth output: do not write depths to the output HDF5 genotypes file")
.build();
private PluginParameter dataSetName = new PluginParameter.Builder<>("name", null, String.class)
.guiName("Data set name")
.required(false)
.description("(Optional) Short data set name to be added as an root level annotation under \"dataSetName\"")
.build();
private PluginParameter dataSetDescription = new PluginParameter.Builder<>("desc", null, String.class)
.guiName("Data set description")
.required(false)
.description("(Optional) Short data set description to be added as an root level annotation under \"dataSetDescription\"")
.build();
public MergeRenameDeleteTaxaPlugin() {
super(null, false);
}
public MergeRenameDeleteTaxaPlugin(Frame parentFrame, boolean isInteractive) {
super(parentFrame, isInteractive);
}
private static final Logger myLogger = Logger.getLogger(MergeRenameDeleteTaxaPlugin.class);
String dataSetDescrip, date;
String errorMessage;
private GenotypeTable inputGenotypes = null;
private String inputGenosName = null;
private TreeMap> newNameToOldNames = new TreeMap();
private BasicGenotypeMergeRule genoMergeRule = null;
private GenotypeTableBuilder genos = null; //output genotype table
private TaxaList taxaList = null;
@Override
protected void preProcessParameters(DataSet input) {
if (input == null) {
throw new IllegalArgumentException("MergeRenameDeleteTaxaPlugin: preProcessParameters: Please select one Genotype Table.");
}
List genotypeTables = input.getDataOfType(GenotypeTable.class);
inputGenosName = genotypeTables.get(0).getName();
myLogger.info("\n" + pluginDescription() + "\n");
myLogger.info("Input genotype name: " + inputGenosName);
if (genotypeTables.size() == 1) {
inputGenotypes
= (GenotypeTable) genotypeTables.get(0).getData();
taxaList = inputGenotypes.taxa();
} else {
throw new IllegalArgumentException("MergeRenameDeleteTaxaPlugin: preProcessParameters: Please select one Genotype Table.");
}
date = new SimpleDateFormat("yyyyMMdd").format(new Date());
String outfile = outputHDF5Genotypes();
outputHDF5Genotypes(outfile.replace("__DATE__", "_" + date));
}
public DataSet processData(DataSet input) {
readTaxaRenameKey();
setUpHDF5GenotypeTableBuilder();
int nTaxaAdded = addRenamedMergedTaxa();
if (dataSetName() != null) {
genos.dataSetName(parseDataSetName(dataSetName()));
}
if (dataSetDescription() != null) {
genos.dataSetDescription(parseDataSetDescription(dataSetDescription(), nTaxaAdded));
}
genos.build();
myLogger.info("\n\nFinished creating new HDF5 genotpye file with merged and renamed taxa.\n\n");
return null;
}
private void readTaxaRenameKey() {
myLogger.info("\nReading the taxaRenameKey file:\n " + taxaRenameKey() + "\n");
BufferedReader taxaRenameKeyReader = Utils.getBufferedReader(taxaRenameKey());
String line;
int nLinesRead = 0;
try {
while ((line = taxaRenameKeyReader.readLine()) != null) {
nLinesRead++;
if (nLinesRead == 1) {
continue; // skip the header
}
String[] values = line.split("\t", -1);
String oldName = values[0];
String newName = values[1];
if (!newNameToOldNames.containsKey(newName)) {
newNameToOldNames.put(newName, new TreeSet());
}
newNameToOldNames.get(newName).add(oldName);
}
} catch (IOException e) {
System.err.println("\n\nProblem reading the taxaRenameKey file (" + taxaRenameKey() + "):\n\t" + e);
System.exit(1);
}
myLogger.info("\nFinished reading the taxaRenameKey file (nTaxa=" + (nLinesRead - 1) + ")\n");
}
private void setUpHDF5GenotypeTableBuilder() {
genoMergeRule = new BasicGenotypeMergeRule(avgSeqErrorRate());
File hdf5File = new File(outputHDF5Genotypes());
if (hdf5File.exists()) {
errorMessage = "\nERROR: the output HDF5 genotypes file:\n " + outputHDF5Genotypes() + "\n already exists\n\n";
myLogger.error(errorMessage);
throw new IllegalStateException(errorMessage);
} else {
myLogger.info("\nInitializing the output HDF5 file:\n " + outputHDF5Genotypes() + "\n\n");
genos = GenotypeTableBuilder.getTaxaIncrementalWithMerging(outputHDF5Genotypes(), inputGenotypes.positions(), genoMergeRule);
}
}
private int addRenamedMergedTaxa() {
int nTaxaAdded = 0;
int nTaxaDeleted = 0;
for (Map.Entry> newNameAndOldNames : newNameToOldNames.entrySet()) {
String newName = newNameAndOldNames.getKey();
int nTaxaToMergeOrDelete = newNameAndOldNames.getValue().size();
if (newName.equals("delete") || newName.equals("remove")) {
nTaxaDeleted += nTaxaToMergeOrDelete;
continue;
}
StringBuilder oldNames = new StringBuilder();
Taxon.Builder TaxonBuilder = null;
int[][] alleleDepths = new int[NucleotideAlignmentConstants.NUMBER_NUCLEOTIDE_ALLELES][inputGenotypes.positions().numberOfSites()];
byte[] taxonGenos = null;
for (String oldName : newNameAndOldNames.getValue()) {
oldNames.append(oldName + ",");
Taxon oldTaxon = taxaList.get(taxaList.indexOf(oldName));
if (TaxonBuilder == null) {
TaxonBuilder = new Taxon.Builder(oldTaxon); // adds all the annotations from the first oldName taxon
TaxonBuilder = TaxonBuilder.name(newName);
} else {
GeneralAnnotation oldAnnos = oldTaxon.getAnnotation();
for (Map.Entry oldAnno : oldAnnos.getAllAnnotationEntries()) {
TaxonBuilder = TaxonBuilder.addAnno(oldAnno.getKey(), oldAnno.getValue());
}
}
TaxonBuilder = TaxonBuilder.addAnno("OldName", oldName);
if (inputGenotypes.hasDepth()) {
for (int site = 0; site < inputGenotypes.positions().numberOfSites(); site++) {
int[] alleleDepthsAtSite = inputGenotypes.depthForAlleles(taxaList.indexOf(oldName), site);
for (int allele = 0; allele < NucleotideAlignmentConstants.NUMBER_NUCLEOTIDE_ALLELES; allele++) {
alleleDepths[allele][site] += alleleDepthsAtSite[allele];
}
}
}
if (nTaxaToMergeOrDelete == 1) {
taxonGenos = inputGenotypes.genotypeAllSites(taxaList.indexOf(oldName));
}
}
if (nTaxaToMergeOrDelete > 1) {
if (inputGenotypes.hasDepth()) {
taxonGenos = resolveGenosForTaxon(alleleDepths);
} else {
throw new IllegalStateException("\n\nERROR: Merging genotypes across replicate taxa is not allowed when there is no depth\n\n");
}
}
if (noDepthOutput()) {
genos.addTaxon(TaxonBuilder.build(), taxonGenos, null);
} else {
genos.addTaxon(TaxonBuilder.build(), alleleDepths, taxonGenos);
}
myLogger.info(" ...finished calling/adding genotypes for " + newName + " OldName(s):" + oldNames.deleteCharAt(oldNames.length() - 1).toString());
nTaxaAdded++;
}
myLogger.info("\nFinished adding genotypes for " + nTaxaAdded + " taxa. nTaxaDeleted=" + nTaxaDeleted);
return nTaxaAdded;
}
private byte[] resolveGenosForTaxon(int[][] depthsForTaxon) {
int nAlleles = depthsForTaxon.length;
int[] depthsAtSite = new int[nAlleles];
int nSites = depthsForTaxon[0].length;
byte[] genos = new byte[nSites];
for (int site = 0; site < nSites; site++) {
for (int allele = 0; allele < nAlleles; allele++) {
depthsAtSite[allele] = depthsForTaxon[allele][site];
}
genos[site] = genoMergeRule.callBasedOnDepth(depthsAtSite);
}
return genos;
}
private String parseDataSetName(String dataSetName) {
return dataSetName.replace("__DATE__", "_" + date);
}
private String parseDataSetDescription(String dataSetDescrip, int nTaxa) {
int nSNPs = inputGenotypes.numberOfSites();
return dataSetDescrip.replace("__SNPS__", "" + nSNPs).replace("__TAXA__", "" + nTaxa).replace("__DATE__", date);
}
@Override
public ImageIcon getIcon() {
URL imageURL = MergeRenameDeleteTaxaPlugin.class
.getResource("/net/maizegenetics/analysis/images/lowDepthToMissing.gif");
if (imageURL == null) {
return null;
} else {
return new ImageIcon(imageURL);
}
}
@Override
public String getButtonName() {
return "MergeRenameTaxa";
}
@Override
public String getToolTipText() {
return "Rename taxa, merging those with the same new name";
}
/**
* Convenience method to run plugin with input and output GenotypeTable objects
* (rather than DataSets)
*
* @param inputGenos Input GenotypeTable.
*
* @return GenotypeTable where genotypes with depth below the specified minimum are set to missing.
*/
public void runPlugin(GenotypeTable inputGenos) {
DataSet input = new DataSet(new Datum("inputGenotypes", inputGenos, null), null);
runPlugin(input);
}
/**
* Convenience method to run plugin.
*/
public void runPlugin(DataSet input) {
performFunction(input);
}
/**
* Tab-delimited file with original and new taxa names.
* Taxa with the same new name will be merged. Taxa with
* the new name "delete" will be removed. Any other columns
* (and the header line) are ignored.
*
* @return Taxa Rename Key
*/
public String taxaRenameKey() {
return taxaRenameKey.value();
}
/**
* Set Taxa Rename Key. Tab-delimited file with original
* and new taxa names. Taxa with the same new name will
* be merged. Taxa with the new name "delete" will be
* removed. Any other columns (and the header line) are
* ignored.
*
* @param value Taxa Rename Key
*
* @return this plugin
*/
public MergeRenameDeleteTaxaPlugin taxaRenameKey(String value) {
taxaRenameKey = new PluginParameter<>(taxaRenameKey, value);
return this;
}
/**
* Output genotypes file
*
* @return Output Genotypes
*/
public String outputHDF5Genotypes() {
return outputHDF5Genotypes.value();
}
/**
* Set Output Genotypes. Output genotypes file
*
* @param value Output Genotypes
*
* @return this plugin
*/
public MergeRenameDeleteTaxaPlugin outputHDF5Genotypes(String value) {
outputHDF5Genotypes = new PluginParameter<>(outputHDF5Genotypes, value);
return this;
}
/**
* Average sequencing error rate per base (used to decide
* between heterozygous and homozygous calls when merging
* taxa)
*
* @return Avg Seq Error Rate
*/
public Double avgSeqErrorRate() {
return avgSeqErrorRate.value();
}
/**
* Set Avg Seq Error Rate. Average sequencing error rate
* per base (used to decide between heterozygous and homozygous
* calls when merging taxa)
*
* @param value Avg Seq Error Rate
*
* @return this plugin
*/
public MergeRenameDeleteTaxaPlugin avgSeqErrorRate(Double value) {
avgSeqErrorRate = new PluginParameter<>(avgSeqErrorRate, value);
return this;
}
/**
* No depth output: do not write depths to the output
* genotypes file (applies only to hdf5 or VCF)
*
* @return No Depth Output
*/
public Boolean noDepthOutput() {
return noDepthOutput.value();
}
/**
* Set No Depth Output. No depth output: do not write
* depths to the output genotypes file (applies only to
* hdf5 or VCF)
*
* @param value No Depth Output
*
* @return this plugin
*/
public MergeRenameDeleteTaxaPlugin noDepthOutput(Boolean value) {
noDepthOutput = new PluginParameter<>(noDepthOutput, value);
return this;
}
/**
* (Optional) Short data set name to be added as an root
* level annotation under "/DataSetName"
*
* @return Data set name
*/
public String dataSetName() {
return dataSetName.value();
}
/**
* Set Data set name. (Optional) Short data set name to
* be added as an root level annotation under "dataSetName"
*
* @param value Data set name
*
* @return this plugin
*/
public MergeRenameDeleteTaxaPlugin dataSetName(String value) {
dataSetName = new PluginParameter<>(dataSetName, value);
return this;
}
/**
* (Optional) Short data set description to be added as
* an root level annotation under "dataSetDescription"
*
* @return Data set description
*/
public String dataSetDescription() {
return dataSetDescription.value();
}
/**
* Set Data set description. (Optional) Short data set
* description to be added as an root level annotation
* under "/DataSetDescription"
*
* @param value Data set description
*
* @return this plugin
*/
public MergeRenameDeleteTaxaPlugin dataSetDescription(String value) {
dataSetDescription = new PluginParameter<>(dataSetDescription, value);
return this;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy