All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.dna.map.GenomeSequence Maven / Gradle / Ivy

/**
 * Interface for Genome Sequence
 * 
 */
package net.maizegenetics.dna.map;

import java.util.ArrayList;
import java.util.Map;
import java.util.Set;

import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.util.Tuple;


/**
 * Defines the genome sequence of a chromosome
 * 
 * @author Lynn Johnson
 *
 */
public interface GenomeSequence {

    /**
     * Returns a list of chromosomes whose sequences have been
     * stored in the chromsomeSequence map of the class implementing
     * this interface.  Return empty set if empty.
     * 
     * @return  a Set of Chromosome objects
     */
    public Set chromosomes();

    /**
     * Takes a Chromosome object and returns the stored byte array representing
     * the genomic sequence for the specified chromosome.
     * 
     * @param chrom; a Chromosome object representing the chromosome whose
     * 				sequence will be returned
     * @return  A byte array containing the chromosome alleles in NucleotideAlignmentConstant
     * 			form packed in half bytes
     */
    public byte[] chromosomeSequence(Chromosome chrom);

    /**
     * Returns the partial genomic sequence for a  chromosome, from the specified start
     * position to the specified end position.  THe start/end positions are inclusive and
     * the request is 1-based (though the alleles are stored in a 0-based byte array).
     * 
     * @param chrom:  the chromosome whose partial sequence will be returned.
     * @param startSite:  the 1-based position in the sequence to start the pull.
     * @param endSite:  the 1-based position in the sequence that will be the last allele in the pull
     * @return A byte array of alleles in NucleotideAlignmentConstant form that is packed into
     * 			half bytes.
     */
    public byte[] chromosomeSequence(Chromosome chrom, int startSite, int endSite);

    /**
     * Returns the partial genomic sequence from the specified start
     * position to the specified end position.  THe start/end positions are inclusive and
     * the request is 0-based (though the alleles are stored in a 0-based byte array).  Note the difference with
     * chromosomes, which start with 1.  Can only return 2.1 billion sites per call.
     *
     * @param startSite:  the 0-based position in the sequence to start the pull.
     * @param lastSite:  the 0-based position in the sequence that will be the last allele in the pull
     * @return A byte array of alleles in NucleotideAlignmentConstant
     */
    public byte[] genomeSequence(long startSite, long lastSite);

    /**
     * Returns the partial genomic sequence from the specified start
     * position to the specified end position.  The start/end positions are inclusive and
     * the request is 0-based (though the alleles are stored in a 0-based byte array).  Note the difference with
     * chromosomes, which start with 1.  Can only return 2.1 billion sites per call.
     *
     * @param startSite:  the 0-based position in the sequence to start the pull.
     * @param lastSite:  the 0-based position in the sequence that will be the last allele in the pull
     * @return A String of the sequence
     */
    default String genomeSequenceAsString(long startSite, long lastSite){
        return NucleotideAlignmentConstants.nucleotideBytetoString(genomeSequence(startSite,lastSite));
    }
    
    /**
     * Takes a list of coordinates from the full genome sequence and for each returns
     * the corresponding chromosome and a coordinate relative to the start of
     * that chromosome.  The request is 0-based, as are the arrays where the
     * alleles are stored, and the results. 
     * 
     * @param coordinates:  list of coordinates from the full genome sequence to be like mapped
     * @return A map of > for each global coordinate
     *          passed where "Long" is the global ref, and Tuple<> is the chrom/chrom-position
     *          to which this relates.
     */    
    public Map> fullRefCoordinateToChromCoordinate(ArrayList coordinates);

    /**
     * Returns the length of the current chromsome
     * @param chromosome
     * @return
     */
    public int chromosomeSize(Chromosome chromosome);

    /**
     * Returns the length of the entire genome
     */
    public long genomeSize();

    /**
     * Returns the number of chromosomes
     */
    public int numberOfChromosomes();

    /**
     * Returns the allele value in a byte for the specified PHYSICAL position (1-based)
     * @param chrom  Chromosome object we wish to query
     * @param position Position on the chromosome whose value will be returned
     * @return
     */
    byte genotype(Chromosome chrom, int position);

    /**
     * Returns the TASSEL encoding for the allele value in a byte for the specified PHYSICAL position
     * @param chrom
     * @param positionObject  Position object from which the physical will be obtained.
     * @return
     */
    byte genotype(Chromosome chrom, Position positionObject);

    
    /**
     * Returns the haplotype allele value in a String for the specified physical position on the specified chromosome
     * @param chrom
     * @param position
     * @return
     */
    String genotypeAsString(Chromosome chrom, int position);

    /**
     * Returns the haplotype allele value in a String for the specified physical position on the specified chromosome
     * @param chrom
     * @param positionObject
     * @return
     */
    String genotypeAsString(Chromosome chrom, Position positionObject);

    /**
     * Returns a string of haplotype allele values for the specified physical start and
     * physical end positions on the specified chromosome.
     * @param chrom
     * @param startSite
     * @param endSite
     * @return
     */
    String genotypeAsString(Chromosome chrom, int startSite, int endSite);



}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy