All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.pipelineTests.GenerateRForPHG Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.pipelineTests;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObject;

import org.apache.log4j.Logger;

import net.maizegenetics.pangenome.api.HaplotypeGraph;
import net.maizegenetics.pangenome.api.HaplotypeGraphBuilderPlugin;
import net.maizegenetics.pangenome.api.HaplotypeGraphStreamBuilderPlugin;
import net.maizegenetics.pangenome.api.HaplotypeNode;
import net.maizegenetics.pangenome.api.HaplotypeNode.VariantInfo;
import net.maizegenetics.pangenome.api.ReferenceRange;
import net.maizegenetics.taxa.Taxon;

/**
 * This class holds methods and classes to generate R code for PHG classes.
 * It also creates vectors that hold PHG database table information.
 * 
 * @author lcj34
 *
 */

public class GenerateRForPHG {

    private static final Logger myLogger = Logger.getLogger(GenerateRForPHG.class);

    private GenerateRForPHG() {

    }

    public static void main(String[] args) {
        printHeader();
        //GenerateRCode.generate(HaplotypeGraphBuilderPlugin.class, "configFile", "haplotypeGraph");
        //GenerateRCode.generate(HaplotypeGraphStreamBuilderPlugin.class, "configFile", "haplotypeGraph");
//        generate(FilterTaxaBuilderPlugin.class, "genotypeTable");
//        generate(KinshipPlugin.class, "genotypeTable");
    }

    private static void printHeader() {

        System.out.println("#!/usr/bin/env Rscript");

        System.out.println("\n#--------------------------------------------------------------------");
        System.out.println("# Script Name:   TasselPluginWrappers.R");
        System.out.println("# Description:   Generated R interface to TASSEL 5");
        System.out.println("# Author:        Brandon Monier, Ed Buckler, Terry Casstevens");
        System.out.print("# Created:       ");
        System.out.println(new Date());
        System.out.println("#--------------------------------------------------------------------");

        System.out.println("# Preamble\n");

        System.out.println("\n## Load packages");
        System.out.println("if (!requireNamespace(\"BiocManager\")) {");
        System.out.println("    install.packages(\"BiocManager\")");
        System.out.println("}");

        System.out.println("\npackages <- c(");
        System.out.println("\"rJava\"");
        System.out.println(")");
        System.out.println("BiocManager::install(packages)");
        System.out.println("library(rJava)");

        System.out.println("\n## Init JVM");
        System.out.println("rJava::.jinit()");

        System.out.println("\n## Add TASSEL 5 class path");
        System.out.println("rJava::.jaddClassPath(\"/tassel-5-standalone/lib\")");
        System.out.println("rJava::.jaddClassPath(\"/tassel-5-standalone/sTASSEL.jar\")\n");

        System.out.println("source(\"R/AllClasses.R\")");

    }    
    
    /**
     * Given a PHG HaplotypeGraph, create a set of vectors
     * representing data for all ReferenceRanges in the graph.
     * 
     * If the user passes a list of refRangeIds, then only return
     * data for those reference ranges.  If this list is null or empty,
     * return data for all reference ranges in the graph.
     * 
     * @param graph
     * @param refRanges:  List of reference range ids
     * @return
     */
    public static RefRangeVectors graphToRefRangeVectors(HaplotypeGraph graph, int[] refRanges) {
        // Get all the reference ranges from the graph
        List referenceRanges = graph.referenceRangeList();
        
        // Need to stort this graph based on referenceRangeID:
        boolean useList = true;
        List refRangeList = new ArrayList<>();
        
        // Convert int[] to ArrayList to faciliate contains()
        if (refRanges == null || refRanges.length == 0) {
            useList = false;
        } else {
            refRangeList = Arrays.stream(refRanges).boxed().collect(Collectors.toList());
        }
        
        int numberOfRanges = useList ? refRanges.length : referenceRanges.size();
        
        int[] refRangeIds = new int[numberOfRanges];
        String[] chromosomes = new String[numberOfRanges];
        int[] startPos = new int[numberOfRanges];
        int[] endPos = new int[numberOfRanges];       
        String[] refLineName = new String[numberOfRanges];
        int[] numberOfNodes = new int[numberOfRanges];
        
        // Grab data from the graph reference ranges to populate RefRangeVectors.
        int count = 0;
        for (int idx = 0; idx < referenceRanges.size() && count < numberOfRanges; idx++) { 
            ReferenceRange refRange = referenceRanges.get(idx);
            
            if (useList && !refRangeList.contains(refRange.id())) continue; // skipping this one
            
            refRangeIds[count] = refRange.id();
            chromosomes[count] = refRange.chromosome().getName();
            startPos[count] = refRange.start();
            endPos[count] = refRange.end();
            refLineName[count] = refRange.referenceName();
            
            numberOfNodes[count] = graph.nodes(refRange).size();
            count++;
        }
 
        return new RefRangeVectors( refRangeIds,  chromosomes, startPos, endPos, refLineName, numberOfNodes);
    }
    

    
    /**
     * Given a PHG HaplotypeGraph, create a set of vectors
     * representing data for all Haplotypes/Nodes in the graph.
     * 
     * This assumes the graph was created with sequence and variant info if
     * the user has these 2 parameters as "true"
     * 
     * @param graph
     * @param refRanges:  List of reference range ids
     * @param includeSequence: boolean - only include if true
     * @param includeVariants: boolean - only include if true
     * @return
     */
    public static HaplotypesDataVectors graphToHapsInRefRangeVectors(HaplotypeGraph graph, int[]refRanges, boolean includeSequence, boolean includeVariants) {
        
        boolean tempUseList = true;
       
        // Convert int[] to ArrayList to faciliate contains()
        List refRangeList ;
        if (refRanges == null || refRanges.length == 0) {
            tempUseList = false;
            refRangeList = null;
        } else {
            refRangeList = Arrays.stream(refRanges).boxed().collect(Collectors.toList());
        }
        
        final boolean useList = tempUseList;       
        List hapNodeList = new ArrayList();
        
        // Grab data from the graph to populate HapsInRefRangeVectors.
        // Filter nodes based on user requested reference ranges 
        graph.nodeStream().forEach(node -> {
            if (!useList || (useList && refRangeList.contains(node.referenceRange().id()))) {
                hapNodeList.add(node); // only add if is on the list, or there is no list
            }
        });
        
        int numberOfNodes =  hapNodeList.size();

        int[] hapIds = new int[numberOfNodes];
        int[] refRangeIds = new int[numberOfNodes];
        int[] methodIds = new int[numberOfNodes];
        String[] taxa = new String[numberOfNodes];       
        String[] sequence = new String[numberOfNodes];
        String[] variantInfo = new String[numberOfNodes];
        
        // Process nodes to create vectors
        for (int idx = 0; idx < hapNodeList.size(); idx++) { 
  
            HaplotypeNode hapNode = hapNodeList.get(idx);
            
            hapIds[idx] = hapNode.id();
            refRangeIds[idx] = hapNode.referenceRange().id();
            methodIds[idx] = -1; // HaplotypeNode doesn't hold method id !!
            
            StringBuilder sb = new StringBuilder();
           
            // Create a String of comma separated taxon names
            List tList = hapNode.taxaList();           
            sb.append(tList.get(0).getName());
            for (int tIdx = 1; tIdx < tList.size(); tIdx++) {
                sb.append(",").append(tList.get(tIdx).getName());
            }
            taxa[idx] = sb.toString();
            sequence[idx] = includeSequence ? hapNode.haplotypeSequence().sequence() : "";
            
            // create variantInfo String if requested
            Optional> optVarList = hapNode.variantInfos();
            if (includeVariants && optVarList.isPresent()) {
                List infoList = optVarList.get();
                variantInfo[idx] = getVariantInfoString(infoList);
            } else {
                variantInfo[idx] = ""; // should this be an empty json object ??
            }
                        
        }
 
        return new HaplotypesDataVectors( hapIds,  refRangeIds, methodIds, taxa, sequence, variantInfo);
    }
    
    // Create a json object that contains an array of variantInfos
    private static String getVariantInfoString(List variantInfos) {
        
        JsonArrayBuilder vinfoArray = Json.createArrayBuilder();
        for (HaplotypeNode.VariantInfo info : variantInfos) {
            vinfoArray.add(createVinfoObject(info));
        }
        JsonObject vinfoArrayObject = Json.createObjectBuilder().add("variantInfos", vinfoArray).build();
        return vinfoArrayObject.toString();
    }
    
    public static JsonObject createVinfoObject (VariantInfo vinfo) {
        JsonObject vinfoObject = Json.createObjectBuilder()
                .add("chromosome", vinfo.chromosome())
                .add("start", vinfo.start())
                .add("end", vinfo.end())
                .add("genotypeString", vinfo.genotypeString())
                .add("refAllele", vinfo.refAlleleString())
                .add("altAllele", vinfo.altAlleleString())
                .add("isVariant", vinfo.isVariant())
                .build();
        return vinfoObject;
    }

}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy