All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.ncbi.dbsnp.SNPAnnotation Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package umcg.genetica.ncbi.dbsnp;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.Set;
import java.util.HashSet;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.text.Strings;

/**
 *
 * @author harmjan
 */
public class SNPAnnotation implements java.io.Serializable {

    public HashMap rsToChrPos = new HashMap();
    private HashSet contigs;
    private HashMap contigChr;
    private HashMap contigOrientation;
    public HashMap> chrToSNP = new HashMap>();
    public HashMap> chrToUniquePositions = new HashMap>();
    Integer[][] rsToRs = null;

    public SNPAnnotation(String contigInfoLoc, String snpInfoLoc) throws IOException {
        loadContigInfo(contigInfoLoc);
        loadSNPAnnotation(snpInfoLoc);
    }

    public SNPAnnotation(String contigInfoLoc, String snpInfoLoc, String ref) throws IOException {
        loadContigInfo(contigInfoLoc, ref);
        loadSNPAnnotation(snpInfoLoc);
    }

    private void loadContigInfo(String location) throws IOException {
        System.out.println("Loading contigs from: " + location);
        contigs = new HashSet();
        contigChr = new HashMap();
        contigOrientation = new HashMap();

        TextFile in = new TextFile(location, TextFile.R);

        String[] elems = in.readLineElemsReturnObjects(TextFile.tab);
        int ctr = 0;
        while (elems != null) {
            contigs.add(elems[0]);
            contigChr.put(elems[0], elems[5]);
            System.out.println(elems[0] + "\t" + elems[5]);
            contigOrientation.put(elems[0], elems[8]);
            elems = in.readLineElemsReturnObjects(TextFile.tab);
        }
        in.close();

    }

    private void loadContigInfo(String location, String buildString) throws IOException {
        System.out.println("Loading contigs from: " + location);
        contigs = new HashSet();
        contigChr = new HashMap();
        contigOrientation = new HashMap();

        TextFile in = new TextFile(location, TextFile.R);

        String[] elems = in.readLineElemsReturnObjects(TextFile.tab);
        int ctr = 0;
        while (elems != null) {
            if (elems[12].equals(buildString)) {
//		if(elems[1].equals()){
//
//		}


                contigs.add(elems[0]);
                contigChr.put(elems[0], elems[5]);
                System.out.println(elems[0] + "\t" + elems[5]);
                contigOrientation.put(elems[0], elems[8]);
            }


            elems = in.readLineElemsReturnObjects(TextFile.tab);
        }
        in.close();

    }

    private void loadSNPAnnotation(String loc) throws IOException {
        System.out.println("Loading SNP Annotation from: " + loc);

        int linecounter = 0;
        TextFile in = new TextFile(loc, TextFile.R);


        int ctr = 0;
        String[] elems = in.readLineElemsReturnObjects(TextFile.tab);
        while (elems != null) {

            String contig = elems[2];
            if (contigs.contains(contig)) {
                String chrStr = contigChr.get(contig);
                Integer chrPos = null;
                try {
                    chrPos = Integer.parseInt(elems[10]);
                } catch (Exception e) {
//		    System.out.println("Cannot parse chr pos: "+elems[10]+"\t"+Strings.concat(elems, Strings.tab));
                }

                try {

                    Integer RSnum = Integer.parseInt(elems[1]);
                    ArrayList chrSNPs = chrToSNP.get(chrStr);
                    HashSet uniquePositions = chrToUniquePositions.get(chrStr);

                    if (uniquePositions == null) {
                        uniquePositions = new HashSet();
                    }

                    if (chrSNPs == null) {
                        chrSNPs = new ArrayList();
                    }

                    chrSNPs.add(RSnum);

                    if (rsToChrPos.get(RSnum) != null) {
                        rsToChrPos.put(RSnum, -1);
                    } else {

                        rsToChrPos.put(RSnum, chrPos);
                    }

                    if (chrPos != null) {
                        uniquePositions.add(chrPos);
                    }

                    chrToUniquePositions.put(chrStr, uniquePositions);
                    chrToSNP.put(chrStr, chrSNPs);
                } catch (java.lang.ArrayIndexOutOfBoundsException u) {
                    System.out.println(Strings.concat(elems, Strings.tab));
                }
            }

//		if (linecounter % 100000 == 0) {
//		    System.out.println(linecounter + " lines parsed");
//		}
            linecounter++;
            elems = in.readLineElemsReturnObjects(TextFile.tab);
        }
        in.close();

        int numTotalPositions = 0;
        Set< Entry>> it = chrToUniquePositions.entrySet();
        for (Entry> e : it) {
            if (e.getValue() != null) {
                numTotalPositions += e.getValue().size();
            }
        }

        System.out.println(numTotalPositions + " positions loaded");

    }

    public HashSet getContigs() {
        return contigs;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy