net.maizegenetics.analysis.imputation.CallParentAllelesPlugin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
package net.maizegenetics.analysis.imputation;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.FilterGenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListBuilder;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.Datum;
import net.maizegenetics.plugindef.PluginEvent;
import org.apache.log4j.Appender;
import org.apache.log4j.FileAppender;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import javax.swing.*;
import java.awt.*;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
public class CallParentAllelesPlugin extends AbstractPlugin {
private static final Logger myLogger = Logger.getLogger(CallParentAllelesPlugin.class);
private String pedfileName = null;
private int windowSize = 50; //the number of sites to be used a window for determining the original set of snps in LD
private double minRforSnps = 0.2; //the minimum R used to judge whether a snp is in ld with a test group
private double maxMissing = 0.9;
private double minMinorAlleleFrequency = -1.0;
private boolean useBCFilter = true;
private boolean useMultipleBCFilter = false;
private boolean useClusterAlgorithm = false;
private boolean checkSubPops = false;
private boolean useHets = true;
private boolean useWindowLD = false;
private int maxDifference = 0;
private int minUsedClusterSize = 5;
private double maxHetDev = 25;
private int overlap = -1;
private ArrayList familyList = null;
private boolean familyListNotSupplied = true; //used for testing
private boolean includeParents = false;
private boolean assignHaplotypesFromParents = false;
public CallParentAllelesPlugin(Frame parentFrame) {
super(parentFrame, false);
}
@Override
public DataSet performFunction(DataSet input) {
if (pedfileName == null && familyList == null) {
myLogger.error(getUsage());
return null;
}
List inputAlignments = input.getDataOfType(GenotypeTable.class);
LinkedList datumList = new LinkedList();
for (Datum d : inputAlignments) {
GenotypeTable align = (GenotypeTable) d.getData();
if (familyListNotSupplied)
familyList =
PopulationData.readPedigreeFile(pedfileName, align.chromosomeName(0), includeParents);
for (PopulationData family : familyList) {
myLogger.info("Calling parent alleles for family " + family.name + ", chromosome "
+ align.chromosomeName(0) + ".");
String[] ids = new String[family.members.size()];
family.members.toArray(ids);
myLogger.info("creating family alignment for family " + family.name);
TaxaList tL = new TaxaListBuilder().addAll(ids).build();
family.original = FilterGenotypeTable.getInstance(align, tL, false);
if (family.original.numberOfTaxa() == 0) {
myLogger.error("Genotype table for imputation of family " + family.name + " has zero taxa and will be skipped");
continue;
}
if (!useHets) {
byte NN = NucleotideAlignmentConstants.getNucleotideDiploidByte('N');
GenotypeTableBuilder builder =
GenotypeTableBuilder.getSiteIncremental(family.original.taxa());
int nsites = family.original.numberOfSites();
int ntaxa = family.original.numberOfTaxa();
for (int s = 0; s < nsites; s++) {
byte[] siteGeno = family.original.genotypeAllTaxa(s);
for (int t = 0; t < ntaxa; t++) {
if (GenotypeTableUtils.isHeterozygous(siteGeno[t]))
siteGeno[t] = NN;
}
builder.addSite(family.original.positions().get(s), siteGeno);
}
family.original = builder.build();
}
myLogger.info("family alignment created");
if (useWindowLD)
NucleotideImputationUtils.callParentAllelesByWindow(family, maxMissing, minMinorAlleleFrequency, windowSize, minRforSnps);
else if (useClusterAlgorithm)
NucleotideImputationUtils.callParentAllelesUsingClusters(family, maxMissing, minMinorAlleleFrequency, windowSize, checkSubPops);
else if (useBCFilter
&& (family.contribution1 == 0.75 || family.contribution1 == 0.25))
NucleotideImputationUtils.callParentAllelesByWindowForBackcrosses(family, maxMissing, minMinorAlleleFrequency, windowSize, minRforSnps);
else if (useMultipleBCFilter)
NucleotideImputationUtils.callParentAllelesByWindowForMultipleBC(family, maxMissing, 1, windowSize);
else if (assignHaplotypesFromParents) {
UseParentHaplotypes parentHaps = new UseParentHaplotypes(family);
parentHaps.assignHaplotypes();
}
else {
BiparentalHaplotypeFinder hapFinder = new BiparentalHaplotypeFinder(family);
if (overlap > -1)
hapFinder.overlap = overlap;
hapFinder.window = windowSize;
hapFinder.minR2 = minRforSnps;
hapFinder.maxHetDeviation = maxHetDev;
hapFinder.maxDifferenceScore = maxDifference;
hapFinder.minClusterSize = minUsedClusterSize;
hapFinder.minCoverage = 1 - maxMissing;
hapFinder.minMaf = minMinorAlleleFrequency;
hapFinder.assignHaplotyes();
hapFinder.convertGenotypesToParentCalls();
Datum hapDatum =
new Datum("clusters_" + family.name, hapFinder.getClusterReport(), "clusters for "
+ family.name);
datumList.add(hapDatum);
}
String comment =
"Parent Calls for family " + family.name + " from " + d.getName() + ".";
datumList.add(new Datum(family.name, family, comment));
}
}
DataSet resultDS = new DataSet(datumList, this);
fireDataSetReturned(new PluginEvent(resultDS, CallParentAllelesPlugin.class));
return resultDS;
}
@Override
public void setParameters(String[] args) {
if (args == null || args.length == 0) {
myLogger.error(getUsage());
return;
}
int narg = args.length;
for (int i = 0; i < narg; i++) {
if (args[i].equals("-p") || args[i].equalsIgnoreCase("-pedigrees")) {
pedfileName = args[++i];
}
else if (args[i].equals("-w") || args[i].equalsIgnoreCase("-windowSize")) {
windowSize = Integer.parseInt(args[++i]);
}
else if (args[i].equals("-ol") || args[i].equalsIgnoreCase("-overlap")) {
overlap = Integer.parseInt(args[++i]);
}
else if (args[i].equals("-r") || args[i].equalsIgnoreCase("-minR")) {
minRforSnps = Double.parseDouble(args[++i]);
}
else if (args[i].equals("-m") || args[i].equalsIgnoreCase("-maxMissing")) {
maxMissing = Double.parseDouble(args[++i]);
}
else if (args[i].equals("-f") || args[i].equalsIgnoreCase("-minMaf")) {
minMinorAlleleFrequency = Double.parseDouble(args[++i]);
}
else if (args[i].equals("-d") || args[i].equalsIgnoreCase("-maxHetDev")) {
maxHetDev = Double.parseDouble(args[++i]);
}
else if (args[i].equals("-mh") || args[i].equalsIgnoreCase("-minHap")) {
minUsedClusterSize = Integer.parseInt(args[++i]);
}
else if (args[i].equals("-md") || args[i].equalsIgnoreCase("-maxDiff")) {
maxDifference = Integer.parseInt(args[++i]);
}
else if (args[i].equals("-b") || args[i].equalsIgnoreCase("-bc1")) {
String param = args[++i];
if (param.toUpperCase().startsWith("F"))
useBCFilter = false;
}
else if (args[i].equals("-n") || args[i].equalsIgnoreCase("-bcn")) {
String param = args[++i];
if (param.toUpperCase().startsWith("T"))
useMultipleBCFilter = true;
}
else if (args[i].equals("-l") || args[i].equalsIgnoreCase("-logconfig")) {
addFileLogger(args[++i]);
}
else if (args[i].equals("-logfile")) {
setFileLogger(args[++i]);
}
else if (args[i].startsWith("-clust")) {
useClusterAlgorithm = true;
}
else if (args[i].toLowerCase().equals("-windowld")) {
useWindowLD = true;
}
else if (args[i].equals("-subpops")) {
checkSubPops = true;
}
else if (args[i].equals("-nohets")) {
useHets = false;
}
else if (args[i].equals("-addParents")) {
includeParents = true;
}
else if (args[i].equals("-useParentHaps")) {
assignHaplotypesFromParents = true;
includeParents = true;
}
else if (args[i].equals("?"))
myLogger.info(getUsage());
}
}
private void addFileLogger(String filename) {
try {
Appender fileAppender = Logger.getRootLogger().getAppender("fileAppender");
if (fileAppender == null) {
FileAppender filelog =
new FileAppender(new PatternLayout("%d %-5p [%c{1}] %m %n"), filename, true);
filelog.setName("fileAppender");
Logger.getRootLogger().addAppender(filelog);
}
} catch (Exception e) {
myLogger.info("log file could not be instantiated");
e.printStackTrace();
}
}
private void setFileLogger(String filename) {
try {
Appender fileAppender = Logger.getRootLogger().getAppender("fileAppender");
if (fileAppender != null)
Logger.getRootLogger().removeAppender(fileAppender);
FileAppender filelog =
new FileAppender(new PatternLayout("%d %-5p [%c{1}] %m %n"), filename, true);
filelog.setName("fileAppender");
Logger.getRootLogger().addAppender(filelog);
} catch (Exception e) {
myLogger.info("log file could not be instantiated");
e.printStackTrace();
}
}
public void setPedfileName(String pedfileName) {
this.pedfileName = pedfileName;
}
public void setWindowSize(int windowSize) {
this.windowSize = windowSize;
}
public void setMinRforSnps(double minRforSnps) {
this.minRforSnps = minRforSnps;
}
public void setMaxDifference(int maxDifference) {
this.maxDifference = maxDifference;
}
public void setMinUsedClusterSize(int minUsedClusterSize) {
this.minUsedClusterSize = minUsedClusterSize;
}
public void setCheckSubPops(boolean checkSubPops) {
this.checkSubPops = checkSubPops;
}
public void setMaxMissing(double maxMissing) {
this.maxMissing = maxMissing;
}
public void setMinMinorAlleleFrequency(double minMinorAlleleFrequency) {
this.minMinorAlleleFrequency = minMinorAlleleFrequency;
}
public void setUseBCFilter(boolean useBCFilter) {
this.useBCFilter = useBCFilter;
}
public void setUseMultipleBCFilter(boolean useMultipleBCFilter) {
this.useMultipleBCFilter = useMultipleBCFilter;
}
public void setUseClusterAlgorithm(boolean useClusterAlgorithm) {
this.useClusterAlgorithm = useClusterAlgorithm;
}
public void setUseHets(boolean useHets) {
this.useHets = useHets;
}
public void setUseWindowLD(boolean useWindowLD) {
this.useWindowLD = useWindowLD;
}
public void setMaxHetDev(double maxHetDev) {
this.maxHetDev = maxHetDev;
}
public void setOverlap(int overlap) {
this.overlap = overlap;
}
public void setLogFile(String logname) {
setFileLogger(logname);
}
public void includeParents(boolean include) {
includeParents = true;
}
public void haplotypesFromParents(boolean useparents) {
assignHaplotypesFromParents = useparents;
}
@Override
public ImageIcon getIcon() {
return null;
}
@Override
public String getButtonName() {
return "Call Parents";
}
@Override
public String getToolTipText() {
return null;
}
public String getUsage() {
StringBuilder usage =
new StringBuilder("The CallParentAllelesPlugin requires the following parameter:\n");
usage.append("-p or -pedigrees : a file containing pedigrees of the individuals to be imputed\n");
usage.append("The following parameters are optional:\n");
usage.append("-w or -windowSize : the number of SNPs to examine for LD clusters (default = 50)\n");
usage.append("-r or -minR : minimum R used to filter SNPs on LD (default = 0.2, use 0 for no ld filter)\n");
usage.append("-m or -maxMissing : maximum proportion of missing data allowed for a SNP (default = 0.9)\n");
usage.append("-f or -minMaf : minimum minor allele frequency used to filter SNPs. If negative, filters on expected segregation ratio from parental contribution (default = -1)\n");
usage.append("-d or -maxHetDev : filter sites on maximum heterozygosity, max heterozygosity = maxHetDev * sd of percent het + mean percent het (default = 5)\n");
usage.append("-mh or -minHap : haplotypes seen fewer than minHap times will not be used to infer parental haplotypes (default = 5)\n");
usage.append("-d or -maxDiff : maximum allowable number of allele differences for treating two haplotypes as equivalent (default = 0)\n");
usage.append("-b or -bc1 : use BC1 specific filter (default = true)\n");
usage.append("-n or -bcn : use multipe backcross specific filter (default = false)\n");
usage.append("-logfile : the name of a file to which all logged messages will be printed.\n");
usage.append("-cluster : use the cluster algorithm. minMaf defaults to 0.05.\n");
usage.append("-windowld : use the window LD method to impute parent haplotypes.\n");
usage.append("-subpops : filter sites for heterozygosity in subpopulations. Only used for -cluster option.\n");
usage.append("-nohets : delete het calls from original data before imputing.\n");
usage.append("? : print the parameter list.\n");
return usage.toString();
}
public void setFamilyList(ArrayList familyList) {
this.familyList = familyList;
familyListNotSupplied = false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy