All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.db_loading.PHGData Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
/**
 * 
 */
package net.maizegenetics.pangenome.db_loading;

import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import com.google.common.collect.Multimap;
import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import kotlin.Pair;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.db_loading.DBLoadingUtils.AnchorType;
import net.maizegenetics.util.Tuple;

/**
 * @author lcj34
 *
 */
public interface PHGData {

    /**
     * Return genoid
     * @param line_name
     * @return
     */
    int getGenoidFromLine(String line_name);
    
    /**
     * Returns hapid for given line name and hap_number
     * @param genoid
     * @param hap_number
     * @return
     */
    int getHapidForGenoidHapNumber(int genoid, int hap_number);
    
    /**
     * Returns list of hapids for a given line name
     * @param genoid
     * @return
     */
    List getHapidsForGenoid(int genoid);

    /**
     * Method to retrieve both a line name and the hap number given a hapid.
     * @param hapid
     * @return  Tuple consisting of Tuple
     */
    Tuple getLineNameHapNumberFromHapid(int hapid);

    /**
     * Retrieves line name and hapnumber, and returns them with the hapid.
     * hapid is the key in the map.  THe line name and hap number are concatenated
     * with an underscore and returned as the string value for each hapid.
     * @param lineNames
     * @return
     */
    Map getHapidHapNumberLineNamesForLines(List lineNames);
   
    /**
     * Method grabs a list of distinct chromosome names for a genome_interval version
     * 
     * @param line_name
     * @param hap_number
     * @param version  Version name for anchors
     * @return
     */
    List getChromNamesForHaplotype(String line_name, int hap_number, String version);

    /**
     * Returns method_id given a method name. or 0 if no id found for the given name
     * @param method_name
     * @return
     */
    int getMethodIdFromName(String method_name);

    /**
     * Returns a map of  created from the genotypes
     * and haplotypes table.  
     * @return
     */
    Map getHapidMapFromLinenameHapNumber();

    /**
     * Takes a list of taxa and returns the corresponding gamete_group_id or 0
     * @param gametes Items on the list must be of the form taxon_hapNumber
     * @return
     */
    int getGameteGroupIDFromTaxaList(List gametes);

    /**
     * Returns reference range id
     * @param refData needs to be of form chr1:startpos:endPos
     *
     * @return
     */
    int getRefRangeIDFromString(String refData);

    /**
     * Returns the haplotype_id from the haplotypes table based on the gamete_grp_id
     * (calculated from the taxa list), the ref_range_id (calculated from the 
     * ref coordinates part of the idline) and the method.
     * 
     * @param idLine expected form:  refchr:refStartPos:refEndPos;taxa_hapnumber:taxa_hapnumber:etc
     * @param methodName This is the method used for creating the haplotypes
     * @return
     */
    int getHaplotypeIDFromFastaIDLine(String idLine, String methodName);

    /**
     * Returns sorted treeRangeMap of Positions for specified chromosome
     * @param chrom
     * @return
     */
    RangeMap getIntervalRangesWithIDForChrom( String chrom);
    

    /**
     * THis method pulls the haplotype_counts_id and corresponding path for all entries
     * in the paths table with the specified method.
     * @param method
     * @return Map of Haplotype_counts_id/paths
     */
    Map getHapCountsIDAndPathsForMethod(String method);
    
    /**
     * This method pulls the haplotype_counts_id with corresponding genotypes line name and data for all DB entries  
     * based on the supplied method
     * @param method
     * @return Map of  from haplotype_counts table
     */
    Map> getHapCountsIDAndDataForVersionMethod(String method);

    /**
     * Method uses line_name, method_name and file_Group_name to fetch a read_mapping_id.
     * -1 returned if the this combination is not present in the db.
     * @param line_name
     * @param method_name
     * @param file_group_name
     * @return  read_mapping_id or -1
     */
    int getReadMappingId(String line_name, String method_name, String file_group_name);

    /**
     * Method to get the Read Mapping data from the DB using the read_mapping_id.
     * Generally this is returned from PHGData.getReadMappingId(String line_name, String method_name, String file_group_name)
     * @param readMappingId
     * @return byte array of the read mapping data.
     */
    Pair getReadMappingsForId(int readMappingId);

    /**
     * This method takes a list of taxa and a method name.
     * It returns a list of read_mapping Ids based on these 2 parameters
     * @return List of  read_mapping_ids.
     */
    List getReadMappingIdsForTaxaMethod(List taxa,String method);

    /**
     * Method to get the mapping ids by method
     * @param methodName
     * @return
     */
    List getReadMappingsForMethod(String methodName);


    /**
     * Returns path_ids from the read_mapping_paths table associated
     * with the list of read_mapping_ids provided.
     * @param readMappingIds
     * @return
     */
    List getPathIdsForReadMappingIds(List readMappingIds);
    /**
     * Method to get a list of all genoid line_names that have paths for
     * method = method_name.
     * @param method_name
     * @return
     */
    Set getTaxaForPathMethod(String method_name);

    /**
     * Returns a map of taxon to paths for that taxon from the paths table
     * for a specified method name.
     *
     * @param method_name
     * @return
     */
    Map getTaxonPathsForMethod(String method_name);

    /**
     * Method to check to see if a given taxon and fileGroup are already in the readMapping table of the DB.
     * @param taxon
     * @param fileGroupName
     * @param methodName
     * @return
     */
    boolean isFileGroupNew(String taxon, String fileGroupName, String methodName);

    /**
     * Method queries the db methods table for the description field given a methods name
     * @param method_name:  name of method
     * @return Map of plugin parameter to value as stored for the DB.  Null if the method does not exist
     */
    Map getMethodDescriptionFromName(String method_name);

    /**
     * Method to return all reference ranges associated with a specified method name
     * @param methodName name of method group for which the user wants reference range ids
     * @return A list of reference_range_ids associated with the speciried mathod name.
     */
    List getRefRangesForMethod(String methodName);


    /**
     * Method takes a list of taxa and a method name and returns
     * the paths (if populated in the paths table) stored for
     * each taxon/method pair.
     *
     * For each taxon, a List> is returned, which
     * provides a haplotypes_id list for taxon's chromosome.  There will be
     * only 1 list if it is a haploid, 2 for a diploid, etc
     * @param taxon
     * @param method_name
     * @return
     */
    Map>> getPathsForTaxonMethod(List taxon, String method_name);

    /**
     * Gets a list of the taxa currently in the database.
     * Taxa are identified by the line_name field of the genotypes table
     * @return List of taxon names
     */
    List getDbTaxaNames();

    /**
     * Get the taxa that make up  the specified taxa group
     * @param group_name
     * @return List of taxon that comprise the group
     */
    List getTaxaForTaxaGroup(String group_name);

    /**
     * Returns taxa_grp_id given a taxa group name. or 0 if no id found for the given name
     * @param group_name
     * @return
     */
    int getTaxaGroupIDFromName(String group_name);

    /**
     * Returns a list of all taxa group names from the taxa_groups table
     * @return
     */
    List getAllTaxaGroupNames();

    /**
     * Given an MD5 hash value, returns the db id for that entry, or 0 if none exist
     * @param hash
     * @return
     */
    int getHaplotypeListIDfromHash(String hash);

    /**
     * Given a haplotype_list_id, query the haplotype_list table and return
     * a list of haplotypes stored for that ID, or an empty list if there were none.
     * @param id - haplotype_list_id which is hopefully present in the table
     * @return  a List of the haplotypes associated with that table,
     */
    List getHaplotypeList(int id);

    /**
     * Gets the id from the genome_file_data table based on
     * the genoid .
     * @param genoid
     * @return returns fileid or -1 if no information for this genoid
     */
    int getGenomeFileIdFromGenoid(int genoid);

    /**
     * Gets the id from the genome_file_data table based on the
     * genoid and a filename
     * @param genoid
     * @param file
     * @return
     */
    int getGenomeFileIdFromGenoidAndFile(int genoid, String file);

    /**
     * Given a genoid and a file name, return the hash value and id
     * for that entry in the genome_file_data table.  If there is no entry, return null
     * @param genoid
     * @param file
     * @return
     */
    Pair getGenomeFileHashFromGenoidandFile(int genoid, String file);
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy