All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.gbs.repgen.RGBSProductionSNPCallerPlugin Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

There is a newer version: 5.2.94
Show newest version
/*
 * ProductionSNPCallerPlugin
 */
package net.maizegenetics.analysis.gbs.repgen;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import net.maizegenetics.analysis.gbs.Barcode;
import net.maizegenetics.analysis.gbs.v2.GBSUtils;
import net.maizegenetics.dna.BaseEncoder;
import net.maizegenetics.dna.map.PositionList;
import net.maizegenetics.dna.snp.*;
import net.maizegenetics.dna.snp.score.AlleleDepthUtil;
import net.maizegenetics.dna.snp.genotypecall.BasicGenotypeMergeRule;
import net.maizegenetics.dna.snp.genotypecall.GenotypeMergeRule;
import net.maizegenetics.dna.tag.Tag;
import net.maizegenetics.dna.tag.TagBuilder;
import net.maizegenetics.dna.tag.TagData;
import net.maizegenetics.dna.tag.TagDataSQLite;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListIOUtils;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.DirectoryCrawler;
import net.maizegenetics.util.Utils;
import org.ahocorasick.trie.Emit;
import org.ahocorasick.trie.Trie;
import org.apache.log4j.Logger;

import javax.swing.*;
import java.awt.*;
import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.List;
import java.util.concurrent.atomic.LongAdder;
import java.util.stream.Collectors;

/**
 * This plugin converts all of the fastq (and/or qseq) files in the input folder
 * and keyfile to genotypes and adds these to a genotype file in HDF5 format.
 *
 * We refer to this step as the "Production Pipeline".
 *
 * The output format is either HDF5 or VCF genotypes with allelic depth stored. 
 * Output file type is determined by presence of the ".h5" suffix.  SNP calling is
 * quantitative with the option of using either the Glaubitz/Buckler binomial
 * method (pHet/pErr > 1 = het) (=default), or the Stacks method.
 *
 * Merging of samples with the same LibraryPrepID is handled by
 * GenotypeTableBuilder.addTaxon(), with the genotypes re-called based upon the
 * new depths. Therefore, if you want to keep adding genotypes to the same
 * target HDF5 file in subsequent runs, use the -ko (keep open) option so that
 * the output GenotypeTableBuilder will be mutable, using closeUnfinished()
 * rather than build().
 *
 * If the target output is HDF5, and that GenotypeTable file doesn't exist, it will be
 * created.  
 *
 * Each taxon in the output file is named "ShortName:LibraryPrepID" and is
 * annotated with "Flowcell_Lanes" (=source seq data for current genotype).
 *
 * Requires a database with variants added from a previous "Discovery Pipeline" run.
 * 
 * References to "tag" are being replaced by references to "kmer" as the pipeline
 * is really a kmer alignment process.
 *
 * TODO add the Stacks likelihood method to BasicGenotypeMergeRule
 *
 * @author Ed Buckler
 * @author Jeff Glaubitz
 */
public class RGBSProductionSNPCallerPlugin extends AbstractPlugin {

    private static final Logger myLogger = Logger.getLogger(RGBSProductionSNPCallerPlugin.class);

    private PluginParameter myInputDir = new PluginParameter.Builder<>("i", null, String.class).guiName("Input Directory").required(true).inDir()
            .description("Input directory containing fastq AND/OR qseq files.").build();
    private PluginParameter myKeyFile = new PluginParameter.Builder<>("k", null, String.class).guiName("Key File").required(true).inFile()
            .description("Key file listing barcodes distinguishing the samples").build();
    private PluginParameter myInputDB = new PluginParameter.Builder<>("db", null, String.class).guiName("Input GBS Database").required(true).inFile()
            .description("Input Database file if using SQLite").build();
    private PluginParameter myOutputGenotypes = new PluginParameter.Builder<>("o", null, String.class).guiName("Output Genotypes File").required(true).outFile()
            .description("Output (target) genotypes file to produce.  Default output file type is VCF.  If file suffix is .h5, an hdf5 file will be created instead.")
            .build();
    private PluginParameter myAveSeqErrorRate = new PluginParameter.Builder<>("eR", 0.01, Double.class).guiName("Ave Seq Error Rate")
            .description("Average sequencing error rate per base (used to decide between heterozygous and homozygous calls)").build();
    private PluginParameter myMaxDivergence = new PluginParameter.Builder<>("d", 0, Integer.class).guiName("Max Divergence")
            .description("Maximum divergence (edit distance) between new read and previously mapped read (Default: 0 = perfect matches only)").build();
    private PluginParameter myDepthOutput = new PluginParameter.Builder<>("do", true, Boolean.class).guiName("Write Depths to Output")
            .description("Depth output: True means write depths to the output hdf5 genotypes file, false means do NOT write depths to the hdf5 file").build();
    private PluginParameter myKmerLength = new PluginParameter.Builder<>("kmerLength", 64, Integer.class).guiName("Maximum Kmer Length")
            .description("Length of kmers to process").build();
    private PluginParameter posQualityScore = new PluginParameter.Builder<>("minPosQS", 0.0, Double.class).guiName("Minimun snp quality score")
            .description("Minimum quality score for snp position to be included").build();
    private PluginParameter myBatchSize = new PluginParameter.Builder<>("batchSize", 8, Integer.class).guiName("Batch size of fastq files").required(false)
            .description("Number of flow cells being processed simultaneously").build();
    private PluginParameter myMinQualScore = new PluginParameter.Builder<>("mnQS", 0, Integer.class).guiName("Minimum quality score").required(false)
            .description("Minimum quality score within the barcode and read length to be accepted").build();
    //private PluginParameter myStacksLikelihood = new PluginParameter.Builder<>("sL", false, Boolean.class).guiName("Use Stacks Likelihood")
    //        .description("Use STACKS likelihood method to call heterozygotes (default: use tasselGBS likelihood ratio method)").build();

    private String myOutputDir = null;
    private static boolean isHDF5 = false; // default is VCF
    private TagData tagDataReader = null;
    Multimap tagCntMap=Multimaps.synchronizedMultimap(ArrayListMultimap.create(384, 500_000));
    private Set seqFilesInKeyAndDir = new TreeSet<>(); // fastq (or qseq) file names present in input directory that have a "Flowcell_Lane" in the key file

    protected static int readEndCutSiteRemnantLength;
    private Trie ahoCorasickTrie; // import from ahocorasick-0.2.1.jar
    private String[] likelyReadEndStrings;

    //Documentation of read depth per sample (one recorded per replicate)
    // Treemap is synchronized as multiple threads may increment values.
    private Map rawReadCountsMap = new TreeMap<>();
    private Map rawReadCountsForFullSampleName = Collections.synchronizedMap(rawReadCountsMap);
    private Map matchedReadCountsMap = new TreeMap<>();
    private Map matchedReadCountsForFullSampleName = Collections.synchronizedMap(matchedReadCountsMap);

    private GenotypeMergeRule genoMergeRule = null;
    private boolean taglenException;

    public RGBSProductionSNPCallerPlugin() {
        super(null, false);
    }

    public RGBSProductionSNPCallerPlugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }

    @Override
    public void postProcessParameters() {
        try {
            myOutputDir = (new File(outputGenotypesFile())).getCanonicalFile().getParent();
        } catch (IOException e) {
            throw new IllegalStateException("Problem resolving output directory:" + e);
        }
        genoMergeRule = new BasicGenotypeMergeRule(aveSeqErrorRate());
        
        if (!myOutputGenotypes.isEmpty()) {
            if (outputGenotypesFile().endsWith(".h5")) {
                isHDF5 = true;
            }
        }
    }

    @Override
    public DataSet processData(DataSet input) {
        int batchSize = batchSize();
        List directoryFiles= DirectoryCrawler.listPaths(GBSUtils.inputFileGlob, Paths.get(myInputDir.value()).toAbsolutePath());
        if(directoryFiles.isEmpty()) {
            myLogger.warn("No files matching:"+GBSUtils.inputFileGlob);
            return null;
        }


        tagDataReader =new TagDataSQLite(myInputDB.value());
        TaxaList masterTaxaList= TaxaListIOUtils.readTaxaAnnotationFile(keyFile(), GBSUtils.sampleNameField, new HashMap<>(), true);
        Map fileTaxaMap=TaxaListIOUtils.getUniqueMapOfTaxonByAnnotation(masterTaxaList,GBSUtils.fileNameField)
                .orElseThrow(() -> new IllegalArgumentException("Error: Same file points more than one taxon in the KeyFile"));
        List inputSeqFiles =  directoryFiles.stream()
                .peek(path -> System.out.println(path.getFileName().toString()))
                .filter(path -> fileTaxaMap.containsKey(path.getFileName().toString()))
                .collect(Collectors.toList());
        if (inputSeqFiles.size() == 0) return null; // no files to process
        writeInitialTaxaReadCounts(masterTaxaList); // initialize synchronized maps
        //todo perhaps subset the masterTaxaList based on the files in there, but it seems like it will all be figure out.
        Map canonicalTag=new HashMap<>();  //canonicalize them OR eventually we will use a Trie
        tagDataReader.getTags().stream().forEach(t -> canonicalTag.put(t,t));
        int batchNum = inputSeqFiles.size()/batchSize;
       
        if (inputSeqFiles.size() % batchSize !=0) batchNum++;
        System.out.println("ProductionSNPCallerPluginV2: Total batches to process: " + batchNum);

        final PositionList positionList=tagDataReader.getSNPPositions(positionQualityScore());
        if (positionList == null || positionList.size() == 0) {
        	String errMsg = "\nNo snp positons found with quality score of " + positionQualityScore() + ".\n"
        			+ "Please run UpdateSNPPositionQualityPlugin to add quality scores for your positions,\n"
        			+ " then select snp positions within a quality range you have specified.\n";
        	myLogger.error(errMsg);
        	return null;
        }
               
        GenotypeTableBuilder gtb=setUpGenotypeTableBuilder(outputGenotypesFile(), positionList, genoMergeRule);
        final Multimap tagsToIndex=ArrayListMultimap.create();
        tagDataReader.getAlleleMap().entries().stream()
                .forEach(e -> {
                	// indexOf returns -1 if the list doesn't contain the element, which it won't
                	// if there are snpposition entries with a quality score less than minimumQualityScore 
                    int posIndex=positionList.indexOf(e.getValue().position());
                    if (posIndex >= 0) {
                    	tagsToIndex.put(e.getKey(),new AlleleWithPosIndex(e.getValue(),posIndex));
                    }                   
                });
        
        taglenException = false;
        for (int idx = 0; idx < inputSeqFiles.size(); idx+=batchSize) {
        	tagCntMap.clear(); // start fresh with each new batch
            int end = idx+batchSize;
            if (end > inputSeqFiles.size()) end = inputSeqFiles.size();
            ArrayList sub = new ArrayList();
            for (int jdx = idx; jdx < end; jdx++) sub.add(inputSeqFiles.get(jdx));
            System.out.println("\nStart processing batch " + String.valueOf(idx/batchSize+1));
            sub.parallelStream()
            .forEach(inputSeqFile -> {
                try {
                    int taxaIndex=masterTaxaList.indexOf(fileTaxaMap.get(inputSeqFile.getFileName().toString()));
                    //processFastQFile(masterTaxaList,keyPath, inputSeqFile, enzyme(),canonicalTag,kmerLength(), minimumQualityScore());
                    processFastQ(inputSeqFile,taxaIndex,masterTaxaList,canonicalTag,kmerLength(),minimumQualityScore());
                } catch (StringIndexOutOfBoundsException oobe) {
                    oobe.printStackTrace();
                    myLogger.error(oobe.getMessage());
                    setTagLenException();
                    return;
                }              
            });
            if (taglenException == true) return null; // Tag length failure from processFastQ - halt processing
         
            tagCntMap.asMap().entrySet().stream()
            .forEach(e -> {
                callGenotypes(e.getKey(), e.getValue(), tagsToIndex, positionList, genoMergeRule,gtb,depthToOutput());
                //System.out.println(e.x.getName()+ Arrays.toString(Arrays.copyOfRange(e.y,0,10)))); 
            });
            System.out.println("\nFinished processing batch " + String.valueOf(idx/batchSize+1));
        }
        GenotypeTable myGt = gtb.build();
        ExportUtils.writeToVCF(myGt, outputGenotypesFile(), depthToOutput());
        return null;
    }

    private static void callGenotypes(Taxon taxon, Collection tags, Multimap tagsToIndex,
                   PositionList positionList, GenotypeMergeRule genoMergeRule, GenotypeTableBuilder gtb, boolean outputDepths) {
        int[][] alleleDepths = new int[NucleotideAlignmentConstants.NUMBER_NUCLEOTIDE_ALLELES][positionList.numberOfSites()];
        tags.stream().map(t -> tagsToIndex.get(t)).flatMap(c -> c.stream())
                .forEach(a -> alleleDepths[a.allele()][a.positionIndex()]++);
        if (outputDepths) {
            byte[][] byteDepths = AlleleDepthUtil.depthIntToByte(alleleDepths);
            gtb.addTaxon(taxon, resolveGenosForTaxon(alleleDepths, genoMergeRule),byteDepths);
        } else {
        	gtb.addTaxon(taxon, resolveGenosForTaxon(alleleDepths, genoMergeRule));
        }
    }

    private class AlleleWithPosIndex extends SimpleAllele {
        private int positionIndex;

        private AlleleWithPosIndex(Allele myAllele, int positionIndex) {
            super(myAllele.allele(), myAllele.position());
            this.positionIndex=positionIndex;
        }

        public int positionIndex() {
            return positionIndex;
        }
    }

    private class CountOfReadQuality {
        LongAdder allReads=new LongAdder();
        LongAdder goodBarcodedReads=new LongAdder();
        LongAdder goodMatched=new LongAdder();
        LongAdder perfectMatches=new LongAdder();
        LongAdder imperfectMatches=new LongAdder();
        LongAdder singleImperfectMatches=new LongAdder();
    }

//    private void processFastQ(Path fastqFile, int taxaIndex, TaxaList masterTaxaList,
//                              TagDistributionMap masterTagTaxaMap, int preferredTagLength, int minQual) throws StringIndexOutOfBoundsException{

    private void processFastQ(Path fastqFile, int taxaIndex, TaxaList masterTaxaList, Map canonicalTags,
            int preferredTagLength, int minQual) throws StringIndexOutOfBoundsException {
        int allReads=0, goodBarcodedReads = 0, lowQualityReads = 0;
        try {
            int qualityScoreBase=GBSUtils.determineQualityScoreBase(fastqFile);
            BufferedReader br = Utils.getBufferedReader(fastqFile.toString(), 1 << 22);
            long time=System.nanoTime();
            String[] seqAndQual;
            Taxon taxon=masterTaxaList.get(taxaIndex);
            while ((seqAndQual=GBSUtils.readFastQBlock(br, allReads)) != null) {
                allReads++;
                // Decode barcode using the current sequence & quality  score

                if(minQual>0) {
                    //todo move getFirstLowQualityPos into this class?
                    if(BaseEncoder.getFirstLowQualityPos(seqAndQual[1],minQual, qualityScoreBase)<(preferredTagLength)){
                        lowQualityReads++;
                        continue;
                    }
                }
//                rawReadCountsForFullSampleName.put(barcode.getTaxaName(), rawReadCountsForFullSampleName.get(barcode.getTaxaName()) + 1);
                if (seqAndQual[0].length() < preferredTagLength) {
                    String errMsg = "\n\nERROR processing " + fastqFile.toString() + "\n" +
                            "Reading entry number " + allReads + " fails the length test.\n" +
                            "Sequence length " + seqAndQual[0].length() +
                            " is less then maxKmerLength " + preferredTagLength + ".\n" +
                            "Re-run your files with either a shorter mxKmerL value or a higher minimum quality score.\n";
                    throw new StringIndexOutOfBoundsException(errMsg);
                }

                Tag tag = TagBuilder.instance(seqAndQual[0].substring(0,preferredTagLength)).build();
                //Tag tag = removeSecondCutSiteAhoC(seqAndQual[0].substring(barcodeLen),preferredTagLength);
                //Tag tag= TagBuilder.instance(seqAndQual[0].substring(barcode.getBarLength(), barcode.getBarLength() + preferredTagLength)).build();
                if(tag==null) continue;   //null occurs when any base was not A, C, G, T
                goodBarcodedReads++;
                Tag canonicalTag=canonicalTags.get(tag);
                if(canonicalTag!=null) {
                    tagCntMap.put(taxon,canonicalTag);
                    //matchedReadCountsForFullSampleName.put(barcode.getTaxaName(), matchedReadCountsForFullSampleName.get(barcode.getTaxaName()) + 1);
                }
                if (allReads % 1000000 == 0) {
                    myLogger.info("Total Reads:" + allReads + " Reads with barcode and cut site overhang:" + goodBarcodedReads
                            + " rate:" + (System.nanoTime()-time)/allReads +" ns/read");
                }
            }
            myLogger.info("Total number of reads in lane=" + allReads);
            myLogger.info("Total number of good barcoded reads=" + goodBarcodedReads);
            myLogger.info("Total number of low quality reads=" + lowQualityReads);
            myLogger.info("Timing process (sorting, collapsing, and writing TagCount to file).");
            myLogger.info("Process took " + (System.nanoTime() - time)/1e6 + " milliseconds for file " + fastqFile.toString());
            br.close();
        } catch (Exception e) {
            myLogger.error("Good Barcodes Read: " + goodBarcodedReads);
            e.printStackTrace();
        }
    }



    private void reportProgress(int[] counters, long readSeqReadTime, long ifRRNotNullTime) {
        myLogger.info(
                "totalReads:" + counters[0]
                + "  goodBarcodedReads:" + counters[1]
                + "  goodMatchedToTOPM:" + counters[2]
                //            + "  perfectMatches:" + counters[3]
                //            + "  nearMatches:" + counters[4]
                //            + "  uniqueNearMatches:" + counters[5]
                + "  cumulReadSequenceTime: " + ((double) (readSeqReadTime) / 1_000_000_000.0) + " sec"
                + "  cumulProcessSequenceTime: " + ((double) (ifRRNotNullTime) / 1_000_000_000.0) + " sec"
        );
    }

    private void reportTotals(Path fileName, int[] counters, int nFilesProcessed) {
        myLogger.info("Total number of reads in lane=" + counters[0]);
        myLogger.info("Total number of good, barcoded reads=" + counters[1]);
        myLogger.info("Total number of good, barcoded reads matched to the TOPM=" + counters[2]);
        myLogger.info("Finished reading " + nFilesProcessed + " of " + seqFilesInKeyAndDir.size() + " sequence files: " + fileName + "\n");
    }


    private static GenotypeTableBuilder setUpGenotypeTableBuilder(String anOutputFile, PositionList positionList, GenotypeMergeRule mergeRule) {
        if (isHDF5) {
            File hdf5File = new File(anOutputFile);
            if (hdf5File.exists()) {
                myLogger.info("\nGenotypes will be added to existing HDF5 file:\n  " + anOutputFile + "\n");
                return GenotypeTableBuilder.mergeTaxaIncremental(anOutputFile, mergeRule);
            } else {
                myLogger.info("\nThe target HDF5 file:\n  " + anOutputFile
                        + "\ndoes not exist. A new HDF5 file of that name will be created \nto hold the genotypes from this run.");
                return GenotypeTableBuilder.getTaxaIncrementalWithMerging(anOutputFile, positionList, mergeRule);
            }
        } else { // create genotype table for VCF
            GenotypeTableBuilder gtb = GenotypeTableBuilder.getTaxaIncremental(positionList, mergeRule);
            myLogger.info("\nOutput VCF file: \n" + anOutputFile +
                    " \ncreated for genotypes from this run.");
            return gtb;
        }
    }

    private static byte[] resolveGenosForTaxon(int[][] depthsForTaxon, GenotypeMergeRule genoMergeRule) {
        int nAlleles = depthsForTaxon.length;
        int[] depthsAtSite = new int[nAlleles];
        int nSites = depthsForTaxon[0].length;
        byte[] genos = new byte[nSites];
        for (int site = 0; site < nSites; site++) {
            for (int allele = 0; allele < nAlleles; allele++) {
                depthsAtSite[allele] = depthsForTaxon[allele][site];
            }
            genos[site] = genoMergeRule.callBasedOnDepth(depthsAtSite);
        }
        return genos;
    }

    
    private void writeInitialTaxaReadCounts(TaxaList tl) {
    	tl.stream() // Add initial taxa names with count of 0 to synchronized maps
    	.forEach(taxon -> {
    		 rawReadCountsForFullSampleName.put(taxon.getName(), 0); 
    	     matchedReadCountsForFullSampleName.put(taxon.getName(), 0);
    	});
    }

    public void setTagLenException() {
        taglenException = true;
    }
    
    private void printFileNameConventions(String actualFileName) {
        String message
                = "\n\n"
                + "Error in parsing file name:"
                + "\n   The raw sequence filename does not contain either 3, 4, or 5 underscore-delimited values."
                + "\n   Acceptable file naming conventions include the following (where FLOWCELL indicates the flowcell name and LANE is an integer):"
                + "\n       FLOWCELL_LANE_fastq.gz"
                + "\n       FLOWCELL_s_LANE_fastq.gz"
                + "\n       code_FLOWCELL_s_LANE_fastq.gz"
                + "\n       FLOWCELL_LANE_fastq.txt.gz"
                + "\n       FLOWCELL_s_LANE_fastq.txt.gz"
                + "\n       code_FLOWCELL_s_LANE_fastq.txt.gz"
                + "\n       FLOWCELL_LANE_qseq.txt.gz"
                + "\n       FLOWCELL_s_LANE_qseq.txt.gz"
                + "\n       code_FLOWCELL_s_LANE_qseq.txt.gz"
                + "\n"
                + "\n   Actual Filename: " + actualFileName
                + "\n\n";

        myLogger.error(message);
    }

    @Override
    public ImageIcon getIcon() {
        return null;
    }

    @Override
    public String getButtonName() {
        return "Production SNP Caller";
    }

    @Override
    public String getToolTipText() {
        return "Production SNP Caller";
    }

    // The following getters and setters were auto-generated.
    // Please use this method to re-generate.
    //
    // public static void main(String[] args) {
    //     GeneratePluginCode.generate(ProductionSNPCallerPluginV2.class);
    // }

    /**
     * Convenience method to run plugin with one return object.
     */
    // TODO: Replace  with specific type.
    public TagData runPlugin(DataSet input) {
        return (TagData) performFunction(input).getData(0).getData();
    }

    /**
     * Input directory containing fastq AND/OR qseq files.
     *
     * @return Input Directory
     */
    public String inputDirectory() {
        return myInputDir.value();
    }

    /**
     * Set Input Directory. Input directory containing fastq
     * AND/OR qseq files.
     *
     * @param value Input Directory
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin inputDirectory(String value) {
        myInputDir = new PluginParameter<>(myInputDir, value);
        return this;
    }

    /**
     * Key file listing barcodes distinguishing the samples
     *
     * @return Key File
     */
    public String keyFile() {
        return myKeyFile.value();
    }

    /**
     * Set Key File. Key file listing barcodes distinguishing
     * the samples
     *
     * @param value Key File
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin keyFile(String value) {
        myKeyFile = new PluginParameter<>(myKeyFile, value);
        return this;
    }


    /**
     * Input Database file if using SQLite
     *
     * @return Input GBS Database
     */
    public String inputGBSDatabase() {
        return myInputDB.value();
    }

    /**
     * Set Input GBS Database. Input Database file if using
     * SQLite
     *
     * @param value Input GBS Database
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin inputGBSDatabase(String value) {
        myInputDB = new PluginParameter<>(myInputDB, value);
        return this;
    }

    /**
     * Output (target) genotypes file to add new genotypes
     * to (new file created if it doesn't exist)
     *
     * @return Output file Genotypes File
     */
    public String outputGenotypesFile() {
        return myOutputGenotypes.value();
    }

    /**
     * Set Output  Genotypes File. Output (target)
     * genotypes file to add new genotypes to (new file created
     * if it doesn't exist)
     *
     * @param value Output Genotypes File
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin outputGenotypesFile(String value) {
        myOutputGenotypes = new PluginParameter<>(myOutputGenotypes, value);
        return this;
    }

    /**
     * Average sequencing error rate per base (used to decide
     * between heterozygous and homozygous calls)
     *
     * @return Ave Seq Error Rate
     */
    public Double aveSeqErrorRate() {
        return myAveSeqErrorRate.value();
    }

    /**
     * Set Ave Seq Error Rate. Average sequencing error rate
     * per base (used to decide between heterozygous and homozygous
     * calls)
     *
     * @param value Ave Seq Error Rate
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin aveSeqErrorRate(Double value) {
        myAveSeqErrorRate = new PluginParameter<>(myAveSeqErrorRate, value);
        return this;
    }

    /**
     * Maximum divergence (edit distance) between new read
     * and previously mapped read (Default: 0 = perfect matches
     * only)
     *
     * @return Max Divergence
     */
    public Integer maxDivergence() {
        return myMaxDivergence.value();
    }

    /**
     * Set Max Divergence. Maximum divergence (edit distance)
     * between new read and previously mapped read (Default:
     * 0 = perfect matches only)
     *
     * @param value Max Divergence
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin maxDivergence(Integer value) {
        myMaxDivergence = new PluginParameter<>(myMaxDivergence, value);
        return this;
    }

    /**
     * Output depth: write depths to the output
     * hdf5 genotypes file
     *
     * @return Depth to Output - true or false
     */
    public Boolean depthToOutput() {
        return myDepthOutput.value();
    }

    /**
     * User sets true or false, indicating if they do
     * or do not want depth information written to the
     * HDF5 file.
     *
     * @param value Write depth to output file
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin depthToOutput(Boolean value) {
        myDepthOutput = new PluginParameter<>(myDepthOutput, value);
        return this;
    }
    /**
     * Maximum Tag Length
     *
     * @return Maximum Tag Length
     */
    public Integer kmerLength() {
        return myKmerLength.value();
    }

    /**
     * Set Maximum Tag Length:  User should set this value
     * equivalent to what was used in GBSSeqToTagDBPlugin
     * for maximum tag length when creating the database.
     * If the two values are not equal inconsistent results
     * may occur.
     *
     * @param value Maximum Tag Length
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin kmerLength(Integer value) {
        myKmerLength = new PluginParameter<>(myKmerLength, value);
        return this;
    }
    /**
     *  Minimum Position Quality Score
     *
     * @return Minimum position quality score
     */
    public Double positionQualityScore() {
        return posQualityScore.value();
    }

    /**
     * Set Minimum quality score for position:  This value is used to pull
     * SNPs out of the snpposition table.  Only snps with quality
     * scores meeting or exceeding the specified value will be 
     * processed.
     *
     * @param value Minimum position quality score
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin positionQualityScore(Double value) {
        posQualityScore = new PluginParameter<>(posQualityScore, value);
        return this;
    }
    
    /**
     *  Batch size for processing fastq files
     *
     * @return batchSize
     */
    public Integer batchSize() {
        return myBatchSize.value();
    }
    /**
     * Set number of Fastq files processed simultaneously
     * @param value
     * @return
     */
    public RGBSProductionSNPCallerPlugin batchSize(Integer value) {
        myBatchSize = new PluginParameter<>(myBatchSize, value);
        return this;
    }
    /**
     * Minimum quality score within the barcode and read length
     * to be accepted
     *
     * @return Minimum quality score
     */
    public Integer minimumQualityScore() {
        return myMinQualScore.value();
    }

    /**
     * Set Minimum quality score. Minimum quality score within
     * the barcode and read length to be accepted
     *
     * @param value Minimum quality score
     *
     * @return this plugin
     */
    public RGBSProductionSNPCallerPlugin minimumQualityScore(Integer value) {
        myMinQualScore = new PluginParameter<>(myMinQualScore, value);
        return this;
    }
    /**
     * Use STACKS likelihood method to call heterozygotes (default: use
     * tasselGBS likelihood ratio method)
     *
     * @return Use Stacks Likelihood
     */
    //public Boolean useStacksLikelihood() {
    //    return myStacksLikelihood.value();
    //}
    /**
     * Set Use Stacks Likelihood. Use STACKS likelihood method to call
     * heterozygotes (default: use tasselGBS likelihood ratio method)
     *
     * @param value Use Stacks Likelihood
     *
     * @return this plugin
     */
    //public ProductionSNPCallerPlugin useStacksLikelihood(Boolean value) {
    //    myStacksLikelihood = new PluginParameter<>(myStacksLikelihood, value);
    //    return this;
    //}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy