All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.api.VariantUtils Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.api;

import com.google.common.collect.ImmutableMap;
import net.maizegenetics.pangenome.db_loading.DBLoadingUtils;
import net.maizegenetics.util.Tuple;
import org.apache.log4j.Logger;

import java.io.File;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

public class VariantUtils {
    private static final Logger myLogger = Logger.getLogger(VariantUtils.class);

    // cache the gvcf file paths
    // Map
    private static Map gvcfRemoteToLocalFiles = Collections.emptyMap();

    /**
     * This method returns the gvcf file path for the given gvcfId.
     */
    public static String gvcfFilePath(int gvcfId) {
        if (gvcfId == -1) return null;
        return gvcfRemoteToLocalFiles.get(gvcfId);
    }

    public static void setGvcfRemoteToLocalFiles(Map gvcfRemoteToLocalFiles) {
        VariantUtils.gvcfRemoteToLocalFiles = ImmutableMap.copyOf(gvcfRemoteToLocalFiles);
    }

    /**
     * This method returns all the gvcf file entries from the genome_file_data table.
     * These are the entries with type=2 (type=1 are assemblies)
     *
     * @param database - the database connection
     *
     * @return a map of gvcfId to gvcf file path
     */
    private static Map gvcfIdsToGvcfFileMap(Connection database) {

        String query = "SELECT id, genome_path, genome_file from genome_file_data where type=" + DBLoadingUtils.GenomeFileType.GVCF.getValue();
        ImmutableMap.Builder result = new ImmutableMap.Builder<>();

        try (ResultSet rs = database.createStatement().executeQuery(query)) {

            while (rs.next()) {
                int gvcfId = rs.getInt("id");
                String genome_path = rs.getString("genome_path");
                String file = rs.getString("genome_file");
                String serverFilePath = genome_path + "/" + file;

                // gvcfServerPath is of format:  semi-colon separated server and path, e.g. myserver.com;/path/to/gvcfs
                result.put(gvcfId, serverFilePath);
            }
        } catch (SQLException se) {
            // This helps debug when queries have a problem
            int count = 1;
            while (se != null) {
                myLogger.error("SQLException " + count);
                myLogger.error("Code: " + se.getErrorCode());
                myLogger.error("SqlState: " + se.getSQLState());
                myLogger.error("Error Message: " + se.getMessage());
                se = se.getNextException();
                count++;
            }
            throw new IllegalStateException("error querying genome_file_data ");
        } catch (Exception exc) {
            myLogger.error(exc.getMessage(), exc);
            throw new IllegalStateException("VariantUtils: gvcfIdsToGvcfFileMap: Error querying PHG db for genome_file_data:" + exc.getMessage());
        }

        return result.build();

    }

    /**
     * This method takes a String of the form  and returns
     * a tuple with Tuple.x=server and Tuple.y=path.
     *
     * @param genomePath
     *
     * @return
     */
    private static Tuple splitGenomePath(String genomePath) {
        int semiIndex = genomePath.indexOf(";");
        String server = "";
        String path;
        if (semiIndex > -1) {
            server = genomePath.substring(0, semiIndex);
            path = genomePath.substring(semiIndex + 1);
        } else {
            throw new IllegalArgumentException("genome path variable must be a semi-colon separated string, with the first portion indicating the server address, e.g. server;/path/to/file. Error on genomePath: " + genomePath);
        }
        return new Tuple<>(server, path);
    }

    /**
     * takes the remote gvcf file paths as stored in the db genome_file_data table,
     * and translates them to a local path based on the user provided localGVCFFolder value
     *
     * @param database - the database connection
     * @param localGVCFFolder
     */
    public static void setupGVCFRemoteToLocalFiles(Connection database, String localGVCFFolder) {

        if (database == null) {
            throw new IllegalArgumentException("VariantUtils: setupGVCFRemoteToLocalFiles: database cannot be null");
        }

        if (localGVCFFolder == null || localGVCFFolder.isEmpty()) {
            throw new IllegalArgumentException("VariantUtils: setupGVCFRemoteToLocalFiles: localGVCFFolder cannot be null");
        }

        Map gvcfIdToRemotePath = gvcfIdsToGvcfFileMap(database);

        Map idToLocalFile = new HashMap<>();

        for (Map.Entry entry : gvcfIdToRemotePath.entrySet()) {
            int gvcfId = entry.getKey();
            Tuple serverPath = splitGenomePath(entry.getValue());
            // populate the idToLocalFile map
            String justName = new File(serverPath.getY()).getName();
            String localPathName = localGVCFFolder + "/" + justName;
            idToLocalFile.put(gvcfId, localPathName);

        }

        gvcfRemoteToLocalFiles = ImmutableMap.copyOf(idToLocalFile);

    }

    /**
     * This method returns the gvcfRemoteToLocalFiles map
     */
    public static Map getGvcfRemoteToLocalFiles() {
        return gvcfRemoteToLocalFiles;
    }

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy