net.maizegenetics.pangenome.pipelineTests.GenerateRForPHG Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.pipelineTests;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObject;
import org.apache.log4j.Logger;
import net.maizegenetics.pangenome.api.HaplotypeGraph;
import net.maizegenetics.pangenome.api.HaplotypeGraphBuilderPlugin;
import net.maizegenetics.pangenome.api.HaplotypeGraphStreamBuilderPlugin;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.pangenome.api.HaplotypeNode.VariantInfo;
import net.maizegenetics.pangenome.api.ReferenceRange;
import net.maizegenetics.taxa.Taxon;
/**
* This class holds methods and classes to generate R code for PHG classes.
* It also creates vectors that hold PHG database table information.
*
* @author lcj34
*
*/
public class GenerateRForPHG {
private static final Logger myLogger = Logger.getLogger(GenerateRForPHG.class);
private GenerateRForPHG() {
}
public static void main(String[] args) {
printHeader();
//GenerateRCode.generate(HaplotypeGraphBuilderPlugin.class, "configFile", "haplotypeGraph");
//GenerateRCode.generate(HaplotypeGraphStreamBuilderPlugin.class, "configFile", "haplotypeGraph");
// generate(FilterTaxaBuilderPlugin.class, "genotypeTable");
// generate(KinshipPlugin.class, "genotypeTable");
}
private static void printHeader() {
System.out.println("#!/usr/bin/env Rscript");
System.out.println("\n#--------------------------------------------------------------------");
System.out.println("# Script Name: TasselPluginWrappers.R");
System.out.println("# Description: Generated R interface to TASSEL 5");
System.out.println("# Author: Brandon Monier, Ed Buckler, Terry Casstevens");
System.out.print("# Created: ");
System.out.println(new Date());
System.out.println("#--------------------------------------------------------------------");
System.out.println("# Preamble\n");
System.out.println("\n## Load packages");
System.out.println("if (!requireNamespace(\"BiocManager\")) {");
System.out.println(" install.packages(\"BiocManager\")");
System.out.println("}");
System.out.println("\npackages <- c(");
System.out.println("\"rJava\"");
System.out.println(")");
System.out.println("BiocManager::install(packages)");
System.out.println("library(rJava)");
System.out.println("\n## Init JVM");
System.out.println("rJava::.jinit()");
System.out.println("\n## Add TASSEL 5 class path");
System.out.println("rJava::.jaddClassPath(\"/tassel-5-standalone/lib\")");
System.out.println("rJava::.jaddClassPath(\"/tassel-5-standalone/sTASSEL.jar\")\n");
System.out.println("source(\"R/AllClasses.R\")");
}
/**
* Given a PHG HaplotypeGraph, create a set of vectors
* representing data for all ReferenceRanges in the graph.
*
* If the user passes a list of refRangeIds, then only return
* data for those reference ranges. If this list is null or empty,
* return data for all reference ranges in the graph.
*
* @param graph
* @param refRanges: List of reference range ids
* @return
*/
public static RefRangeVectors graphToRefRangeVectors(HaplotypeGraph graph, int[] refRanges) {
// Get all the reference ranges from the graph
List referenceRanges = graph.referenceRangeList();
// Need to stort this graph based on referenceRangeID:
boolean useList = true;
List refRangeList = new ArrayList<>();
// Convert int[] to ArrayList to faciliate contains()
if (refRanges == null || refRanges.length == 0) {
useList = false;
} else {
refRangeList = Arrays.stream(refRanges).boxed().collect(Collectors.toList());
}
int numberOfRanges = useList ? refRanges.length : referenceRanges.size();
int[] refRangeIds = new int[numberOfRanges];
String[] chromosomes = new String[numberOfRanges];
int[] startPos = new int[numberOfRanges];
int[] endPos = new int[numberOfRanges];
String[] refLineName = new String[numberOfRanges];
int[] numberOfNodes = new int[numberOfRanges];
// Grab data from the graph reference ranges to populate RefRangeVectors.
int count = 0;
for (int idx = 0; idx < referenceRanges.size() && count < numberOfRanges; idx++) {
ReferenceRange refRange = referenceRanges.get(idx);
if (useList && !refRangeList.contains(refRange.id())) continue; // skipping this one
refRangeIds[count] = refRange.id();
chromosomes[count] = refRange.chromosome().getName();
startPos[count] = refRange.start();
endPos[count] = refRange.end();
refLineName[count] = refRange.referenceName();
numberOfNodes[count] = graph.nodes(refRange).size();
count++;
}
return new RefRangeVectors( refRangeIds, chromosomes, startPos, endPos, refLineName, numberOfNodes);
}
/**
* Given a PHG HaplotypeGraph, create a set of vectors
* representing data for all Haplotypes/Nodes in the graph.
*
* This assumes the graph was created with sequence and variant info if
* the user has these 2 parameters as "true"
*
* @param graph
* @param refRanges: List of reference range ids
* @param includeSequence: boolean - only include if true
* @param includeVariants: boolean - only include if true
* @return
*/
public static HaplotypesDataVectors graphToHapsInRefRangeVectors(HaplotypeGraph graph, int[]refRanges, boolean includeSequence, boolean includeVariants) {
boolean tempUseList = true;
// Convert int[] to ArrayList to faciliate contains()
List refRangeList ;
if (refRanges == null || refRanges.length == 0) {
tempUseList = false;
refRangeList = null;
} else {
refRangeList = Arrays.stream(refRanges).boxed().collect(Collectors.toList());
}
final boolean useList = tempUseList;
List hapNodeList = new ArrayList();
// Grab data from the graph to populate HapsInRefRangeVectors.
// Filter nodes based on user requested reference ranges
graph.nodeStream().forEach(node -> {
if (!useList || (useList && refRangeList.contains(node.referenceRange().id()))) {
hapNodeList.add(node); // only add if is on the list, or there is no list
}
});
int numberOfNodes = hapNodeList.size();
int[] hapIds = new int[numberOfNodes];
int[] refRangeIds = new int[numberOfNodes];
int[] methodIds = new int[numberOfNodes];
String[] taxa = new String[numberOfNodes];
String[] sequence = new String[numberOfNodes];
String[] variantInfo = new String[numberOfNodes];
// Process nodes to create vectors
for (int idx = 0; idx < hapNodeList.size(); idx++) {
HaplotypeNode hapNode = hapNodeList.get(idx);
hapIds[idx] = hapNode.id();
refRangeIds[idx] = hapNode.referenceRange().id();
methodIds[idx] = -1; // HaplotypeNode doesn't hold method id !!
StringBuilder sb = new StringBuilder();
// Create a String of comma separated taxon names
List tList = hapNode.taxaList();
sb.append(tList.get(0).getName());
for (int tIdx = 1; tIdx < tList.size(); tIdx++) {
sb.append(",").append(tList.get(tIdx).getName());
}
taxa[idx] = sb.toString();
sequence[idx] = includeSequence ? hapNode.haplotypeSequence().sequence() : "";
// create variantInfo String if requested
Optional> optVarList = hapNode.variantInfos();
if (includeVariants && optVarList.isPresent()) {
List infoList = optVarList.get();
variantInfo[idx] = getVariantInfoString(infoList);
} else {
variantInfo[idx] = ""; // should this be an empty json object ??
}
}
return new HaplotypesDataVectors( hapIds, refRangeIds, methodIds, taxa, sequence, variantInfo);
}
// Create a json object that contains an array of variantInfos
private static String getVariantInfoString(List variantInfos) {
JsonArrayBuilder vinfoArray = Json.createArrayBuilder();
for (HaplotypeNode.VariantInfo info : variantInfos) {
vinfoArray.add(createVinfoObject(info));
}
JsonObject vinfoArrayObject = Json.createObjectBuilder().add("variantInfos", vinfoArray).build();
return vinfoArrayObject.toString();
}
public static JsonObject createVinfoObject (VariantInfo vinfo) {
JsonObject vinfoObject = Json.createObjectBuilder()
.add("chromosome", vinfo.chromosome())
.add("start", vinfo.start())
.add("end", vinfo.end())
.add("genotypeString", vinfo.genotypeString())
.add("refAllele", vinfo.refAlleleString())
.add("altAllele", vinfo.altAlleleString())
.add("isVariant", vinfo.isVariant())
.build();
return vinfoObject;
}
}