net.maizegenetics.pangenome.db_loading.PHGDataWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
/**
*
*/
package net.maizegenetics.pangenome.db_loading;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.google.common.collect.Multimap;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.util.Tuple;
/**
* @author lcj34
*
*/
public interface PHGDataWriter extends PHGData {
/**
* Stores chrom, start pos, end pos to referenece_ranges table
* isFocus identifies focus intervals from the user's bed file
*
* @param anchorData
* @param refGroupMethodID method_id used for creating this ref_range_group
* @return
*/
boolean putAllAnchors(List anchorData, int refGroupMethodID);
/**
* Stores required data to the genotypes and haplotypes tables
* for each entry on the list.
*
* @param ghData
* @return
*/
boolean putGenoAndHaploTypeData(GenoHaploData ghData);
/**
* Fills in the haplotypes table
* for the reference ranges.
*
* @param line_name
* @param hapnumber
* @param hapMethod Name of method used to create anchors. Maps to method table for id.
* @param adata Anchor data, including chrom, start/end positions
* @param refGroupMethod List of methods used to create the ref_range_group
* @param gvcf String - name of gvcf file
* @param variant_list String - name of file containing list of variants
* @return
*/
boolean putRefAnchorData(String line_name, int hapnumber, List adata,int hapMethod,
Set refGroupMethod,String gvcf, String variant_list);
/**
* Adds a method, its type and its description to the anchor_methods table
* These are used to identify how sequences were created,how the were combined into consensus
* sequences, how haplotype counts were scores, how paths through the graph were create
* or how an edge was created.
*
* The "type" field identifies the table to which the method belongs.
*
* @param name
* @param type
* @param description - a map of pluginParameter name to value(as String)
* @return the methodId from the methods table
*/
int putMethod(String name, DBLoadingUtils.MethodType type, Map description);
/**
* Adds inter-anchor sequences for the specified assembly to the anchor_sequences
* and anchor_haplotypes table.
*
* This method takes a multi-map as assembly. Inter-anchors that do not map
* to a reference inter-anchor are all given the anchorid 0.
*
* @param line_name
* @param hapNumber
* @param method
* @param anchorSequences
* @return
*/
boolean putAssemblyInterAnchorSequences(String line_name, int hapNumber, String method, Multimap anchorSequences);
/**
* This method takes a map of consensus data, finds the anchorIds based on Position,
* finds the hapids of the taxa whose sequences at the specified anchorID map
* to the consensus. Adds the gamete_group and sequence data to the haplotpes table ;
* adds entries to gamete_groups and gamete_hapltoypes. *
*
* @param consensusMap Multimap>
* @param methodId method used for collapsing anchors
* @return A list of hash codes loaded to the anchor_sequences table for the consensus sequences
*/
void putConsensusSequences(Multimap>> consensusMap,
int methodId);
/**
* Takes a list of gametes and stores to the gamete_groups and gamete_haplotypes table
* Skips if this grouping already exists
* @param gametes list consisting of taxa/gamete number in the form taxaName_gameteNumber
* @return true if successful
*/
boolean putGameteGroupAndHaplotypes(List gametes);
/**
* Stores gamete sequence data to the haplotypes table
* This method associates all entries with the single gamete_grp_id which is passed in.
* It is used when loading reference_ranges sequences or haplotype sequences for a
* single line.
*
* THe gidToVariantDataMap map is used to create the variant list blob for the db
*
* @param gamete_grp_id
* @param method
* @param anchorSequences
* @param gidToVariantDataMap : a map if ReferenceRangeID to Tuple
* @return
*/
void putHaplotypesForGamete(int gamete_grp_id, int method, Map anchorSequences,
Map>> gidToVariantDataMap);
/**
* Add data to the haplotypes table. Entries on the map are for different gamete groups.
* The key is a Position item identifying the genome_interval id
* The value is a Tuple consisting of (x) AnchorDataPHG object with sequence, gvcf, etc; and
* (y) a List of taxa represented by the ANchorDataPHG sequence
*
* @param mapWithGroupHash
* @param method_id Id in the methods table for this group of sequences
*/
void putHaplotypesForMultipleGroups(Multimap> mapWithGroupHash,
int method_id);
/**
* This method adds data to the haplotype_counts table.
* The "data" is a Snappy compressed byte buffer of a 3xn array, found in parameter "counts"
*
* To see how this data is stored, examine DBLoadingUtils.encodeHapCountsArrayFromFile(),
* DBLoadingUtils.encodeHapCountsArrayFromMultiset() and DBLoadingUtils.decodeHapCountsArray()
*
* @param method
* @param methodDetails
* @param taxonName
* @param fastqFile
* @param counts
*/
void putHaplotypeCountsData(String method, Map methodDetails, String taxonName, String fastqFile, byte[] counts);
/**
* This method stores paths data to the paths table.
* @param method - Method Name for Path detemination process
* @param methodDetails Details of how these paths were created
* @param taxon Name of line for which data is being added
* @param readMappingIds List of read_mapping_ids
* @param pathBytes Compressed byte array of paths data
*/
int putPathsData(String method, Map methodDetails, String taxon, List readMappingIds, byte[] pathBytes);
/**
* Takes a method id and a list of reference ranges.
* Populates the ref_range_ref_range_method table.
* @param group_method_id
* @param refRangeList
*/
void putRefRangeRefRangeMethod(int group_method_id, List refRangeList);
/**
* Takes a gamete_grp_id, method_id, list of haplotype sequences and a chromosome.
* Starts the process of storing table data for the haplotypes to the db.
* This will set maxEntries to 10000 and will call
*
* putHaplotypesData(int gamete_grp_id, int method, Map anchorSequences, String chromosome, int maxEntries)
*
* @param gamete_grp_id
* @param method
* @param anchorSequences
* @param chromosome
*/
void putHaplotypesData(int gamete_grp_id, int method, Map anchorSequences, String chromosome);
/**
* Takes a gamete_grp_id, method_id, list of haplotype sequences, a chromosome and a number of MaxEntries.
* Starts the process of storing table data for the haplotypes to the db
* @param gamete_grp_id
* @param method
* @param anchorSequences
* @param chromosome
* @param maxEntries
*/
void putHaplotypesData(int gamete_grp_id, int method, Map anchorSequences, String chromosome, int maxEntries);
/**
* Adds alleles to the allele table
* @param alleles
* @return
*/
boolean putAlleleData(Set alleles);
/**
* Takes a method name, method details string, taxon name (should exist in the genotypes table), file_group_name,
* and a byte array of read mapping data. This is stored to the PHG read_mapping table
* @param method
* @param methodDetails
* @param taxon
* @param file_group_name
* @param mapping_data
* @return
*/
int putReadMappingData(String method, Map methodDetails, String taxon, String file_group_name, byte[] mapping_data);
/**
* This prompts a call to private method loadReadMappingHash() to update this hash table
*/
void updateReadMappingHash();
}