net.maizegenetics.pangenome.CompareFastaToReference Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome;
/**
* Created by terry on 3/22/17.
*/
import net.maizegenetics.util.Utils;
import java.io.BufferedReader;
import java.util.HashMap;
import java.util.Map;
/**
* Created by terry on 3/21/17.
*/
public class CompareFastaToReference {
public static void main(String[] args) {
String haplotypes = args[0];
String reference = args[1];
Map refMap = new HashMap<>();
try (BufferedReader readRef = Utils.getBufferedReader(reference)) {
String refLine = readRef.readLine();
while (refLine != null) {
if (!refLine.startsWith(">")) {
throw new IllegalStateException("refLine should start with >");
}
String[] refHeader = refLine.split(" ");
String refName = refHeader[0].substring(1);
StringBuilder refSeq = new StringBuilder();
refLine = readRef.readLine();
while (refLine != null && !refLine.startsWith(">")) {
refSeq.append(refLine);
refLine = readRef.readLine();
}
System.out.println(refName);
refMap.put(refName, refSeq.toString());
if (refName.equals("8")) {
String ref = refMap.get(refName);
for (int i = 173825120; i < 173856877; i++) {
System.out.print(ref.charAt(i));
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
try (BufferedReader readHaplotypes = Utils.getBufferedReader(haplotypes)) {
String hapLine = readHaplotypes.readLine();
while (hapLine != null) {
if (!hapLine.startsWith(">")) {
throw new IllegalStateException("hapLine should start with >");
}
String header = hapLine;
String[] hapHeader = hapLine.split(" ");
String hapName = hapHeader[0].substring(1);
String[] chrPos = hapHeader[1].split(":");
String chr = chrPos[0];
int startPos = Integer.parseInt(chrPos[1]) - 1;
StringBuilder hapSeq = new StringBuilder();
hapLine = readHaplotypes.readLine();
while (hapLine != null && !hapLine.startsWith(">")) {
hapSeq.append(hapLine);
hapLine = readHaplotypes.readLine();
}
String refSeq = refMap.get(chr);
if (refSeq == null) {
System.out.println("No reference sequence for: " + chr);
}
int numSame = 0;
for (int i = 0; i < hapSeq.length(); i++) {
if (hapSeq.charAt(i) == refSeq.charAt(i + startPos)) {
numSame++;
} else if (startPos == 173825120 && i < 1100) {
System.out.println(i + " different hapSeq: " + hapSeq.charAt(i) + " refSeq: " + refSeq.charAt(i + startPos));
}
}
double percentSame = (double) numSame / (double) hapSeq.length();
System.out.println(header + ": " + percentSame + " start position: " + startPos);
if (percentSame < 1.0) {
System.out.println(hapSeq.substring(0, 40));
System.out.println(refSeq.substring(startPos, startPos + 40));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}