All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.data.AddReferenceAlleleToHDF5Plugin Maven / Gradle / Ivy

/*
 * AddReferenceAlleleToHDF5Plugin
 */
package net.maizegenetics.analysis.data;

import ch.systemsx.cisd.hdf5.HDF5Factory;
import ch.systemsx.cisd.hdf5.IHDF5Reader;
import ch.systemsx.cisd.hdf5.IHDF5Writer;
import net.maizegenetics.dna.WHICH_ALLELE;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;

import org.apache.log4j.Logger;

import javax.swing.*;

import java.awt.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;

import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.GeneralPosition;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.dna.map.PositionList;
import net.maizegenetics.dna.map.PositionListBuilder;
import net.maizegenetics.dna.snp.GenotypeTableBuilder;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.HDF5Utils;

import java.util.List;

import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.util.Utils;

/**
 *
 * @author Jeff Glaubitz ([email protected])
 */
public class AddReferenceAlleleToHDF5Plugin extends AbstractPlugin {

    private static final Logger myLogger = Logger.getLogger(AddReferenceAlleleToHDF5Plugin.class);

    private PluginParameter myInputGenotypes = new PluginParameter.Builder<>("i", null, String.class)
        .guiName("Input HDF5 Genotype File")
        .required(true)
        .inFile()
        .description("Input HDF5 genotype (*.h5) file to be annotated with the reference allele")
        .build();
    private PluginParameter myRefGenome = new PluginParameter.Builder<>("ref", null, String.class)
        .guiName("Reference Genome File")
        .required(true)
        .inFile()
        .description("Reference genome file in fasta format")
        .build();
    private PluginParameter myRefGenomeVersion = new PluginParameter.Builder<>("ver", null, String.class)
        .guiName("Reference Genome Version")
        .required(true)
        .description("Version of the reference genome")
        .build();
    private PluginParameter myOutputGenotypes = new PluginParameter.Builder<>("o", null, String.class)
        .guiName("Output HDF5 Genotype File")
        .required(false)
        .outFile()
        .description("Output HDF5 genotype file annotated with the reference allele (Default: write to same folder as input, with '*.h5' replaced '*_withRef.h5')")
        .build();
    
    private BufferedReader refReader = null;
    private PositionListBuilder newPosListBuilder = null;
    private Chromosome currChr = null;
    private int currPos = Integer.MIN_VALUE;
    private String contextSeq;
    private final boolean writePositions = false;

    public AddReferenceAlleleToHDF5Plugin() {
        super(null, false);
    }

    public AddReferenceAlleleToHDF5Plugin(Frame parentFrame, boolean isInteractive) {
        super(parentFrame, isInteractive);
    }

    @Override
    public void postProcessParameters() {
        if (myOutputGenotypes.isEmpty()) {
            String outFile = (new File( inputHDF5GenotypeFile() )).getAbsolutePath().replaceFirst("\\.h5$", "_withRef.h5");
            outputHDF5GenotypeFile(outFile);
        }
        refReader = Utils.getBufferedReader(referenceGenomeFile());
    }


    @Override
    public DataSet processData(DataSet input) {
        String message = addRefAlleleToHDF5GenoTable();
        if(message != null) {
            myLogger.error(message);
            try {Thread.sleep(500);} catch(Exception e) {}
            throw new IllegalStateException(message);
        }
        try {refReader.close();} catch (IOException e) {}
        fireProgress(100);
        return null;
    }

    private String addRefAlleleToHDF5GenoTable() {
        String message = populatePositionsWithRefAllele();
        if (message != null) return message;
        PositionList newPos = newPosListBuilder.build();
        String genomeVer = newPos.hasReference() ? newPos.genomeVersion() : "unknown";
        myLogger.info("\nGenome version: "+genomeVer+"\n");
        GenotypeTableBuilder newGenos = GenotypeTableBuilder.getTaxaIncremental(newPos, outputHDF5GenotypeFile());
        IHDF5Reader h5Reader = HDF5Factory.open(inputHDF5GenotypeFile());
        List taxaNames = HDF5Utils.getAllTaxaNames(h5Reader);
        int nTaxaWritten = 0;
        for (String taxonName : taxaNames) {
            Taxon taxon = HDF5Utils.getTaxon(h5Reader, taxonName);
            byte[] genos = HDF5Utils.getHDF5GenotypesCalls(h5Reader, taxonName);
            byte[][] depth = HDF5Utils.getHDF5GenotypesDepth(h5Reader, taxonName);
            newGenos.addTaxon(taxon, genos, depth);
            ++nTaxaWritten;
            if(nTaxaWritten % 100 == 0) {
                myLogger.info("...finished writing genotypes and depth for "+nTaxaWritten+" taxa ");
            }
        }
        newGenos.build();
        myLogger.info("\n\nFinished adding reference alleles to file:");
        myLogger.info("  "+outputHDF5GenotypeFile()+"\n\n");
        return null;
    }
    
    private String populatePositionsWithRefAllele() {
        IHDF5Writer h5Writer = HDF5Factory.open(inputHDF5GenotypeFile());
        PositionList oldPosList = PositionListBuilder.getInstance(h5Writer);
//        h5Writer.close();
        newPosListBuilder = new PositionListBuilder();
        if (writePositions) {
            String genomeVer = oldPosList.hasReference() ? oldPosList.genomeVersion() : "unkown";
            myLogger.info("\ncurrent genome version: "+genomeVer+"\n");
            myLogger.info("SNPID\tchr\tpos\tstr\tmaj\tmin\tref\tmaf\tcov\tcontext");
        }
        for (Position oldPos : oldPosList) {
            Chromosome chr = oldPos.getChromosome();
            int pos = oldPos.getPosition();
            byte strand = oldPos.getStrand();
            byte refAllele = retrieveRefAllele(chr, pos, strand);
            if (refAllele == Byte.MIN_VALUE) {
                return "\nCould not find position "+pos+" on chromosome "+chr+" in the reference genome fasta file.\n\n\n";
            }
            Position newPos = new GeneralPosition.Builder(oldPos) 
                .allele(WHICH_ALLELE.Reference, refAllele)
                .build();
            if (writePositions) writePosition(newPos, contextSeq);
            newPosListBuilder.add(newPos);
        }
        newPosListBuilder.genomeVersion(referenceGenomeVersion());
        myLogger.info("Finished populating positions with RefAllele");
        return null;
    }
    
    private byte retrieveRefAllele(Chromosome chr, int pos, int strand) {
        findChrInRefGenomeFile(chr);
        char currChar = findPositionInRefGenomeFile(pos);
        if (currPos == pos) {
            byte refAllele = NucleotideAlignmentConstants.getNucleotideAlleleByte(currChar);
            if (strand == -1) refAllele = NucleotideAlignmentConstants.getNucleotideComplement(refAllele);
            return refAllele;
        } else {
            myLogger.warn("currPos:"+currPos);
            return Byte.MIN_VALUE;
        }
    }
    
    private void findChrInRefGenomeFile(Chromosome chr) {
        String temp = "Nothing has been read from the reference genome fasta file yet";
        try {
            while (refReader.ready() && (currChr == null || currChr.compareTo(chr) < 0)){
                temp = refReader.readLine().trim();
                if (temp.startsWith(">")) {
                    String chrS = temp.replace(">", "");
                    currChr = new Chromosome(chrS); // Chromosome class removes leading "chr" or "chromosome"
                    if (!writePositions) myLogger.info("\nCurrently reading chromosome "+currChr.getName()+" from reference genome fasta file\n\n");
                }
                currPos = 0;
            }
        } catch (IOException e) {
            myLogger.error("Exception caught while reading the reference genome fasta file:\n  "+e+"\nLast line read:\n  "+temp);
            try {Thread.sleep(500);} catch(InterruptedException iE) {}
            e.printStackTrace();
            throw new IllegalStateException("Problem reading reference genome file");
        }
        if (!currChr.equals(chr)) {
            myLogger.error("\nCould not find chromosome "+chr+" in the reference genome fasta file.\nMake sure that the chromosomes are in numerical order in that file\n\n\n");
            try {Thread.sleep(500);} catch(InterruptedException iE) {}
            throw new IllegalStateException("Problem reading reference genome file: Make sure that the chromosomes are in numerical order");
        }
    }
    
    private char findPositionInRefGenomeFile(int pos) {
        char currChar = Character.MAX_VALUE;
        contextSeq = "";
        try {
            while (currPos < pos) {
                int intChar = refReader.read();
                if (intChar == -1) {
                    currPos = Integer.MAX_VALUE;
                    return Character.MAX_VALUE;
                }
                currChar = (char) intChar;
                if (!Character.isWhitespace(currChar)) {
                    currPos++;
                    if (pos - currPos < 60) {
                        contextSeq += currChar;
                    }
                }
            }
        } catch (IOException e) {
            myLogger.error("\n\nError reading reference genome file:\n  "+e+"\n\n");
            throw new IllegalStateException("Problem reading reference genome file");
        }
        return currChar;
    }
    
    private void writePosition(Position pos, String contextSeq) {
        myLogger.info(
            pos.getSNPID()+
            "\t"+pos.getChromosome().getChromosomeNumber()+
            "\t"+pos.getPosition()+
            "\t"+pos.getStrand()+
            "\t"+pos.getAllele(WHICH_ALLELE.GlobalMajor)+
            "\t"+pos.getAllele(WHICH_ALLELE.GlobalMinor)+
            "\t"+pos.getAllele(WHICH_ALLELE.Reference)+
            "\t"+pos.getGlobalMAF()+
            "\t"+pos.getGlobalSiteCoverage()+
            "\t"+contextSeq
        );
    }

    @Override
    public ImageIcon getIcon() {
        return null;
    }

    @Override
    public String getButtonName() {
        return "Add reference allele";
    }

    @Override
    public String getToolTipText() {
        return "Add reference allele to HDF5 genotypes";
    }

    // The following getters and setters were auto-generated.
    // Please use this method to re-generate.
    //
    // public static void main(String[] args) {
    //     GeneratePluginCode.generate(AddReferenceAlleleToHDF5Plugin.class);
    // }

    /**
     * Input HDF5 genotype (*.h5) file to be annotated with
     * the reference allele
     *
     * @return Input HDF5 Genotype File
     */
    public String inputHDF5GenotypeFile() {
        return myInputGenotypes.value();
    }

    /**
     * Set Input HDF5 Genotype File. Input HDF5 genotype (*.h5)
     * file to be annotated with the reference allele
     *
     * @param value Input HDF5 Genotype File
     *
     * @return this plugin
     */
    public AddReferenceAlleleToHDF5Plugin inputHDF5GenotypeFile(String value) {
        myInputGenotypes = new PluginParameter<>(myInputGenotypes, value);
        return this;
    }

    /**
     * Reference genome file in fasta format
     *
     * @return Reference Genome File
     */
    public String referenceGenomeFile() {
        return myRefGenome.value();
    }

    /**
     * Set Reference Genome File. Reference genome file in
     * fasta format
     *
     * @param value Reference Genome File
     *
     * @return this plugin
     */
    public AddReferenceAlleleToHDF5Plugin referenceGenomeFile(String value) {
        myRefGenome = new PluginParameter<>(myRefGenome, value);
        return this;
    }

    /**
     * Version of the reference genome
     *
     * @return Reference Genome Version
     */
    public String referenceGenomeVersion() {
        return myRefGenomeVersion.value();
    }

    /**
     * Set Reference Genome Version. Version of the reference
     * genome
     *
     * @param value Reference Genome Version
     *
     * @return this plugin
     */
    public AddReferenceAlleleToHDF5Plugin referenceGenomeVersion(String value) {
        myRefGenomeVersion = new PluginParameter<>(myRefGenomeVersion, value);
        return this;
    }

    /**
     * Output HDF5 genotype file annotated with the reference
     * allele (Default: write to same folder as input, with
     * '*.h5' replaced '*_withRef.h5')
     *
     * @return Output HDF5 Genotype File
     */
    public String outputHDF5GenotypeFile() {
        return myOutputGenotypes.value();
    }

    /**
     * Set Output HDF5 Genotype File. Output HDF5 genotype
     * file annotated with the reference allele (Default:
     * write to same folder as input, with '*.h5' replaced
     * '*_withRef.h5')
     *
     * @param value Output HDF5 Genotype File
     *
     * @return this plugin
     */
    public AddReferenceAlleleToHDF5Plugin outputHDF5GenotypeFile(String value) {
        myOutputGenotypes = new PluginParameter<>(myOutputGenotypes, value);
        return this;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy