All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.CompareFastaToReference Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome;

/**
 * Created by terry on 3/22/17.
 */

import net.maizegenetics.util.Utils;

import java.io.BufferedReader;
import java.util.HashMap;
import java.util.Map;

/**
 * Created by terry on 3/21/17.
 */
public class CompareFastaToReference {

    public static void main(String[] args) {

        String haplotypes = args[0];
        String reference = args[1];

        Map refMap = new HashMap<>();

        try (BufferedReader readRef = Utils.getBufferedReader(reference)) {

            String refLine = readRef.readLine();
            while (refLine != null) {

                if (!refLine.startsWith(">")) {
                    throw new IllegalStateException("refLine should start with >");
                }

                String[] refHeader = refLine.split(" ");
                String refName = refHeader[0].substring(1);

                StringBuilder refSeq = new StringBuilder();
                refLine = readRef.readLine();
                while (refLine != null && !refLine.startsWith(">")) {
                    refSeq.append(refLine);
                    refLine = readRef.readLine();
                }

                System.out.println(refName);
                refMap.put(refName, refSeq.toString());

                if (refName.equals("8")) {
                    String ref = refMap.get(refName);
                    for (int i = 173825120; i < 173856877; i++) {
                        System.out.print(ref.charAt(i));
                    }
                }

            }

        } catch (Exception e) {
            e.printStackTrace();
        }

        try (BufferedReader readHaplotypes = Utils.getBufferedReader(haplotypes)) {

            String hapLine = readHaplotypes.readLine();
            while (hapLine != null) {

                if (!hapLine.startsWith(">")) {
                    throw new IllegalStateException("hapLine should start with >");
                }
                String header = hapLine;
                String[] hapHeader = hapLine.split(" ");
                String hapName = hapHeader[0].substring(1);
                String[] chrPos = hapHeader[1].split(":");
                String chr = chrPos[0];
                int startPos = Integer.parseInt(chrPos[1]) - 1;

                StringBuilder hapSeq = new StringBuilder();
                hapLine = readHaplotypes.readLine();
                while (hapLine != null && !hapLine.startsWith(">")) {
                    hapSeq.append(hapLine);
                    hapLine = readHaplotypes.readLine();
                }

                String refSeq = refMap.get(chr);
                if (refSeq == null) {
                    System.out.println("No reference sequence for: " + chr);
                }
                int numSame = 0;
                for (int i = 0; i < hapSeq.length(); i++) {
                    if (hapSeq.charAt(i) == refSeq.charAt(i + startPos)) {
                        numSame++;
                    } else if (startPos == 173825120 && i < 1100) {
                        System.out.println(i + " different hapSeq: " + hapSeq.charAt(i) + " refSeq: " + refSeq.charAt(i + startPos));
                    }
                }

                double percentSame = (double) numSame / (double) hapSeq.length();
                System.out.println(header + ": " + percentSame + " start position: " + startPos);
                if (percentSame < 1.0) {
                    System.out.println(hapSeq.substring(0, 40));
                    System.out.println(refSeq.substring(startPos, startPos + 40));
                }

            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy