net.maizegenetics.pangenome.api.VariantUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.api;
import com.google.common.collect.ImmutableMap;
import net.maizegenetics.pangenome.db_loading.DBLoadingUtils;
import net.maizegenetics.util.Tuple;
import org.apache.log4j.Logger;
import java.io.File;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
public class VariantUtils {
private static final Logger myLogger = Logger.getLogger(VariantUtils.class);
// cache the gvcf file paths
// Map
private static Map gvcfRemoteToLocalFiles = Collections.emptyMap();
/**
* This method returns the gvcf file path for the given gvcfId.
*/
public static String gvcfFilePath(int gvcfId) {
if (gvcfId == -1) return null;
return gvcfRemoteToLocalFiles.get(gvcfId);
}
public static void setGvcfRemoteToLocalFiles(Map gvcfRemoteToLocalFiles) {
VariantUtils.gvcfRemoteToLocalFiles = ImmutableMap.copyOf(gvcfRemoteToLocalFiles);
}
/**
* This method returns all the gvcf file entries from the genome_file_data table.
* These are the entries with type=2 (type=1 are assemblies)
*
* @param database - the database connection
*
* @return a map of gvcfId to gvcf file path
*/
private static Map gvcfIdsToGvcfFileMap(Connection database) {
String query = "SELECT id, genome_path, genome_file from genome_file_data where type=" + DBLoadingUtils.GenomeFileType.GVCF.getValue();
ImmutableMap.Builder result = new ImmutableMap.Builder<>();
try (ResultSet rs = database.createStatement().executeQuery(query)) {
while (rs.next()) {
int gvcfId = rs.getInt("id");
String genome_path = rs.getString("genome_path");
String file = rs.getString("genome_file");
String serverFilePath = genome_path + "/" + file;
// gvcfServerPath is of format: semi-colon separated server and path, e.g. myserver.com;/path/to/gvcfs
result.put(gvcfId, serverFilePath);
}
} catch (SQLException se) {
// This helps debug when queries have a problem
int count = 1;
while (se != null) {
myLogger.error("SQLException " + count);
myLogger.error("Code: " + se.getErrorCode());
myLogger.error("SqlState: " + se.getSQLState());
myLogger.error("Error Message: " + se.getMessage());
se = se.getNextException();
count++;
}
throw new IllegalStateException("error querying genome_file_data ");
} catch (Exception exc) {
myLogger.error(exc.getMessage(), exc);
throw new IllegalStateException("VariantUtils: gvcfIdsToGvcfFileMap: Error querying PHG db for genome_file_data:" + exc.getMessage());
}
return result.build();
}
/**
* This method takes a String of the form and returns
* a tuple with Tuple.x=server and Tuple.y=path.
*
* @param genomePath
*
* @return
*/
private static Tuple splitGenomePath(String genomePath) {
int semiIndex = genomePath.indexOf(";");
String server = "";
String path;
if (semiIndex > -1) {
server = genomePath.substring(0, semiIndex);
path = genomePath.substring(semiIndex + 1);
} else {
throw new IllegalArgumentException("genome path variable must be a semi-colon separated string, with the first portion indicating the server address, e.g. server;/path/to/file. Error on genomePath: " + genomePath);
}
return new Tuple<>(server, path);
}
/**
* takes the remote gvcf file paths as stored in the db genome_file_data table,
* and translates them to a local path based on the user provided localGVCFFolder value
*
* @param database - the database connection
* @param localGVCFFolder
*/
public static void setupGVCFRemoteToLocalFiles(Connection database, String localGVCFFolder) {
if (database == null) {
throw new IllegalArgumentException("VariantUtils: setupGVCFRemoteToLocalFiles: database cannot be null");
}
if (localGVCFFolder == null || localGVCFFolder.isEmpty()) {
throw new IllegalArgumentException("VariantUtils: setupGVCFRemoteToLocalFiles: localGVCFFolder cannot be null");
}
Map gvcfIdToRemotePath = gvcfIdsToGvcfFileMap(database);
Map idToLocalFile = new HashMap<>();
for (Map.Entry entry : gvcfIdToRemotePath.entrySet()) {
int gvcfId = entry.getKey();
Tuple serverPath = splitGenomePath(entry.getValue());
// populate the idToLocalFile map
String justName = new File(serverPath.getY()).getName();
String localPathName = localGVCFFolder + "/" + justName;
idToLocalFile.put(gvcfId, localPathName);
}
gvcfRemoteToLocalFiles = ImmutableMap.copyOf(idToLocalFile);
}
/**
* This method returns the gvcfRemoteToLocalFiles map
*/
public static Map getGvcfRemoteToLocalFiles() {
return gvcfRemoteToLocalFiles;
}
}