All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.cellbase.build.transform.CaddParser Maven / Gradle / Ivy

The newest version!
package org.opencb.cellbase.build.transform;

import org.broad.tribble.readers.TabixReader;

import org.opencb.cellbase.core.serializer.CellBaseSerializer;
import org.opencb.cellbase.build.transform.formats.Cadd;

import java.io.*;
import java.nio.file.Path;

/**
 * @author Antonio Rueda
 * @author Luis Miguel Cruz.
 * @since October 08, 2014 
 */
public class CaddParser extends CellBaseParser{


    private final Path caddFilePath;
    private final String chrName;

    public CaddParser(Path caddFilePath, String chrName, CellBaseSerializer serializer){
    	super(serializer);
        this.caddFilePath = caddFilePath;
        this.chrName = chrName;
    }

    public void parse(){
        try {
            TabixReader inputReader = new TabixReader(caddFilePath.toString());
            TabixReader.Iterator caddIterator = inputReader.query(chrName);
            Cadd caddVariant = null;
            for (String line; (line = caddIterator.next()) != null;) {
                String[] fields = line.split("\t");
                String chr = fields[0];
                String ref = fields[2];
                String alt = fields[4];

                int pos = Integer.parseInt(fields[1]);

                // If the variant is the same as the last iteration variant, don't print it
                if (sameVariant(caddVariant, chr, pos, ref, alt)) {
                    caddVariant.addCaddValues(Float.parseFloat(fields[88]), Float.parseFloat(fields[89]), fields[68]);
                } else {
                    if (caddVariant != null) {
                        serializer.serialize(caddVariant);
                    }
                    caddVariant = createCaddVariant(fields);
                }
            }
            serializer.serialize(caddVariant);
        } catch (ArrayIndexOutOfBoundsException e) {
            e.printStackTrace();
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }

    private boolean sameVariant(Cadd caddVariant, String chr, int pos, String ref, String alt) {
        return caddVariant != null &&
                (caddVariant.getChromosome().equals(chr) && caddVariant.getStart() == pos &&
                        caddVariant.getReference().equals(ref) && caddVariant.getAlternate().equals(alt));
    }
    
    private Cadd createCaddVariant(String[] fields){
    	String ref = fields[2], alt = fields[4], chr = fields[0];
        int pos = Integer.parseInt(fields[1]);
    	
        Float encExp = stringToFloat(fields[29]);
        Float encH3K27Ac = stringToFloat(fields[30]);
        Float encH3K4Me1 = stringToFloat(fields[31]);
        Float encH3K4Me3 = stringToFloat(fields[32]);
        Float encNucleo = stringToFloat(fields[33]);

        Integer encOCC = null;
        if (!fields[34].equals("NA")){
            encOCC = Integer.parseInt(fields[34]);
        }

        Float encOCCombPVal = stringToFloat(fields[35]);
        Float encOCDNasePVal = stringToFloat(fields[36]);
        Float encOCFairePVal = stringToFloat(fields[37]);
        Float encOCpolIIPVal = stringToFloat(fields[38]);
        Float encOCctcfPVal = stringToFloat(fields[39]);
        Float encOCmycPVal = stringToFloat(fields[40]);
        Float encOCDNaseSig = stringToFloat(fields[41]);
        Float encOCFaireSig = stringToFloat(fields[42]);
        Float encOCpolIISig = stringToFloat(fields[43]);
        Float encOCctcfSig = stringToFloat(fields[44]);
        Float encOCmycSig = stringToFloat(fields[45]);

        Cadd caddVariant = new Cadd(
                alt, ref, chr, pos, pos, encExp, encH3K27Ac,
                encH3K4Me1, encH3K4Me3, encNucleo, encOCC,
                encOCCombPVal, encOCDNasePVal, encOCFairePVal,
                encOCpolIIPVal, encOCctcfPVal, encOCmycPVal,
                encOCDNaseSig, encOCFaireSig, encOCpolIISig,
                encOCctcfSig, encOCmycSig);

    	caddVariant.addCaddValues(Float.parseFloat(fields[88]), Float.parseFloat(fields[89]), fields[68]);
    	return caddVariant;
    }
    
    private Float stringToFloat(String floatName){
        if (floatName.equals("NA")){
            return null;
        } else{
            return Float.valueOf(floatName);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy