net.maizegenetics.pangenome.db_loading.PHGDataWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
There is a newer version: 1.10
/**
 * 
 */
package net.maizegenetics.pangenome.db_loading;

import java.util.List;
import java.util.Map;
import java.util.Set;

import com.google.common.collect.Multimap;

import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.util.Tuple;

/**
 * @author lcj34
 *
 */
public interface PHGDataWriter extends PHGData {
    /**
     * Stores chrom, start pos, end pos to referenece_ranges table
     * isFocus identifies focus intervals from the user's bed file
     * 
     * @param anchorData
     * @param refGroupMethodID  method_id used for creating this ref_range_group
     * @return
     */            
    boolean putAllAnchors(List anchorData,   int refGroupMethodID);
    
    /**
     * Stores required data to the genotypes and haplotypes tables
     * for each entry on the list.
     * 
     * @param ghData
     * @return
     */
    boolean putGenoAndHaploTypeData(GenoHaploData ghData);
    
    /**
     * Fills in the haplotypes table
     * for the reference ranges.
     * 
     * @param line_name
     * @param hapnumber
     * @param hapMethod Name of method used to create anchors.  Maps to method table for id.
     * @param adata  Anchor data, including chrom, start/end positions
     * @param refGroupMethod  List of methods used to create the ref_range_group
     * @param gvcf String - name of gvcf file
     * @param variant_list String - name of file containing list of variants
     * @return
     */
    boolean putRefAnchorData(String line_name, int hapnumber, List adata,int hapMethod, 
             Set refGroupMethod,String gvcf, String variant_list);

    /**
     * Adds a method, its type and its description to the anchor_methods table
     * These are used to identify how sequences were created,how the were combined into consensus
     * sequences,  how haplotype counts were scores, how paths through the graph were create 
     * or how an edge was created.
     * 
     * The "type" field identifies the table to which the method belongs.
     * 
     * @param name
     * @param type
     * @param description - a map of pluginParameter name to value(as String)
     * @return the methodId from the methods table
     */
    int putMethod(String name, DBLoadingUtils.MethodType type, Map description);

    /**
     * Adds inter-anchor sequences for the specified assembly to the anchor_sequences
     * and anchor_haplotypes table.
     * 
     * This method takes a multi-map as assembly.  Inter-anchors that do not map
     * to a reference inter-anchor are all given the anchorid 0.
     * 
     * @param line_name
     * @param hapNumber
     * @param method
     * @param anchorSequences
     * @return
     */
    boolean putAssemblyInterAnchorSequences(String line_name, int hapNumber, String method, Multimap anchorSequences);
    
    /**
     * This method takes a map of consensus data, finds the anchorIds based on Position,
     * finds the hapids of the taxa whose sequences at the specified anchorID map
     * to the consensus.  Adds the gamete_group and sequence data to the haplotpes table ;
     * adds entries to gamete_groups and gamete_hapltoypes.     * 
     * 
     * @param consensusMap Multimap>
     * @param methodId  method used for collapsing anchors
     * @return A list of hash codes loaded to the anchor_sequences table for the consensus sequences
     */
    void putConsensusSequences(Multimap>> consensusMap, 
            int methodId);

    /**
     * Takes a list of gametes and stores to the gamete_groups and gamete_haplotypes table
     * Skips if this grouping already exists
     * @param gametes list consisting of taxa/gamete number in the form taxaName_gameteNumber
     * @return true if successful
     */
    boolean putGameteGroupAndHaplotypes(List gametes);

    /**
     * Stores gamete sequence data to the haplotypes table
     * This method associates all entries with the single gamete_grp_id which is passed in.
     * It is used when loading reference_ranges sequences or haplotype sequences for a 
     * single line.
     * 
     * THe gidToVariantDataMap map is used to create the variant list blob for the db
     * 
     * @param gamete_grp_id 
     * @param method
     * @param anchorSequences
     * @param gidToVariantDataMap :  a map if ReferenceRangeID to Tuple
     * @return
     */
    void putHaplotypesForGamete(int gamete_grp_id, int method,  Map anchorSequences,
            Map>> gidToVariantDataMap);

    /**
     * Add data to the haplotypes table.  Entries on the map are for different gamete groups.
     * The key is a Position item identifying the genome_interval id
     * The value is a Tuple consisting of (x) AnchorDataPHG object with sequence, gvcf, etc; and
     * (y) a List of taxa represented by the ANchorDataPHG sequence
     * 
     * @param mapWithGroupHash
     * @param method_id  Id in the methods table for this group of sequences
     */
    void putHaplotypesForMultipleGroups(Multimap> mapWithGroupHash, 
            int method_id);

    /**
     * This method adds data to the haplotype_counts table.
     * The "data" is a Snappy compressed byte buffer of a 3xn array, found in parameter "counts"
     * 
     * To see how this data is stored, examine DBLoadingUtils.encodeHapCountsArrayFromFile(),
     * DBLoadingUtils.encodeHapCountsArrayFromMultiset() and DBLoadingUtils.decodeHapCountsArray()
     * 
     * @param method
     * @param methodDetails
     * @param taxonName
     * @param fastqFile
     * @param counts
     */
    void putHaplotypeCountsData(String method, Map methodDetails, String taxonName, String fastqFile, byte[] counts);

    /**
     * This method stores paths data to the paths table.
     * @param method - Method Name for Path detemination process
     * @param methodDetails Details of how these paths were created
     * @param taxon Name of line for which data is being added
     * @param readMappingIds List of read_mapping_ids
     * @param pathBytes  Compressed byte array of paths data
     */
    int putPathsData(String method, Map methodDetails, String taxon, List readMappingIds, byte[] pathBytes);

    /**
     * Takes a method id and a list of reference ranges.
     * Populates the ref_range_ref_range_method table.
     * @param group_method_id
     * @param refRangeList
     */
    void putRefRangeRefRangeMethod(int group_method_id, List refRangeList);

    /**
     * Takes a gamete_grp_id, method_id, list of haplotype sequences and a chromosome.
     * Starts the process of storing table data for the haplotypes to the db.
     * This will set maxEntries to 10000 and will call
     *
     * putHaplotypesData(int gamete_grp_id, int method, Map anchorSequences, String chromosome, int maxEntries)
     * 
     * @param gamete_grp_id
     * @param method
     * @param anchorSequences
     * @param chromosome
     */
    void putHaplotypesData(int gamete_grp_id, int method, Map anchorSequences, String chromosome);

    /**
     * Takes a gamete_grp_id, method_id, list of haplotype sequences, a chromosome and a number of MaxEntries.
     * Starts the process of storing table data for the haplotypes to the db
     * @param gamete_grp_id
     * @param method
     * @param anchorSequences
     * @param chromosome
     * @param maxEntries
     */
    void putHaplotypesData(int gamete_grp_id, int method, Map anchorSequences, String chromosome, int maxEntries);


    /**
     * Adds alleles to the allele table
     * @param alleles
     * @return
     */
    boolean putAlleleData(Set alleles);

    /**
     * Takes a method name, method details string, taxon name (should exist in the genotypes table), file_group_name,
     * and a byte array of read mapping data.  This is stored to the PHG read_mapping table
     * @param method
     * @param methodDetails
     * @param taxon
     * @param file_group_name
     * @param mapping_data
     * @return
     */
    int putReadMappingData(String method, Map methodDetails, String taxon, String file_group_name, byte[] mapping_data);

    /**
     * This prompts a call to private method loadReadMappingHash() to update this hash table
     */
    void updateReadMappingHash();
}