
org.snpeff.fileIterator.VcfHapMapFileIterator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.fileIterator;
import java.io.IOException;
import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Genome;
import org.snpeff.util.Gpr;
import org.snpeff.vcf.VcfEntry;
/**
* Opens a Hapmap phased file and iterates over all entries, returning VcfEntries for each line
* Note: Each HapMap file has one chromosome. The reference sequence for the chromosome is read from a fasta file
*
* @author pcingola
*/
public class VcfHapMapFileIterator extends MarkerFileIterator {
static final String format = "GT"; // VCF genotype format
protected Genome genome;
String fatsaFileName;
String seq = ""; // Reference sequence
VcfFileIterator vcfFile;
Chromosome chr;
public VcfHapMapFileIterator(String hapMapFileName, String fatsaFileName, Genome genome) {
super(hapMapFileName, 1); // Positions are one based
this.fatsaFileName = fatsaFileName;
this.genome = genome;
}
/**
* Parse a line from a hapMap file and create a VCF entry.
* @param line
* @return
*/
VcfEntry parseHapMapLine(String line) {
String recs[] = line.split("\t");
String id = recs[0];
int start = parsePosition(recs[1]);
// Get reference sequence
if ((start >= seq.length()) || (start < 0)) throw new RuntimeException("Position out of chromosome range " + start + ".\n\tLine number: " + lineNum + "\t" + line);
String ref = seq.substring(start, start + 1);
// Create ALTS
int count[] = new int[4]; // Count how many from each base
for (int i = 2; i < recs.length; i++) {
// Parse each phased entry
String alts[] = recs[i].toUpperCase().split(" ");
for (String alt : alts) {
if (alt.equals("A")) count[0]++;
else if (alt.equals("C")) count[1]++;
else if (alt.equals("G")) count[2]++;
else if (alt.equals("T")) count[3]++;
}
}
// Remove counts for REF base
if (ref.equals("A")) count[0] = 0;
else if (ref.equals("C")) count[1] = 0;
else if (ref.equals("G")) count[2] = 0;
else if (ref.equals("T")) count[3] = 0;
// Create ALT string
String altStr = "";
if (count[0] > 0) altStr += "A";
if (count[1] > 0) altStr += (altStr.isEmpty() ? "" : ",") + "C";
if (count[2] > 0) altStr += (altStr.isEmpty() ? "" : ",") + "G";
if (count[3] > 0) altStr += (altStr.isEmpty() ? "" : ",") + "T";
VcfEntry vcfEntry = null;
if (altStr.length() <= 3) {
vcfEntry = new VcfEntry(vcfFile, genome, chr.getId(), start, id, ref, altStr, 0.0, "", "", format);
// Create all genotype entries
for (int i = 2; i < recs.length; i++) {
String alts[] = recs[i].split(" ");
String gen = "";
if (alts[0].equals(alts[1])) {
// Homozygous
if (alts[0].equals(ref)) gen = "0|0";
else gen = "1|1";
} else {
// Heterozygous
if (alts[0].equals(ref)) gen = "0|1";
else gen = "1|0";
}
vcfEntry.addGenotype(gen);
}
// System.out.println(line + "\n" + vcfEntry + "\n");
} else Gpr.debug("WARNING! Skipping entry:\t" + line);
return vcfEntry;
}
@Override
protected VcfEntry readNext() {
// Read sequence from file (if needed)
if ((seq == null) || seq.isEmpty()) readSequence();
// Read another line from the file
try {
while (ready()) {
line = readLine();
if (line == null) return null; // End of file?
if (lineNum > 1) { // Ignore title line
VcfEntry vcfEntry = parseHapMapLine(line);
if (vcfEntry != null) return vcfEntry;
}
}
} catch (IOException e) {
throw new RuntimeException("Error reading file '" + fileName + "'. Line ignored:\n\tLine (" + lineNum + "):\t'" + line + "'");
}
return null;
}
/**
* Read sequence from fasta file. Only one sequence per file, so we just read the first sequence.
* WARING: Chromosome is created if it doesn't exist
*/
void readSequence() {
FastaFileIterator ffi = new FastaFileIterator(fatsaFileName);
seq = ffi.next().toUpperCase();
String chrName = ffi.getName();
// Is the chromosome avaialbe?
if (genome.getChromosome(chrName) == null) {
// Create chromosome and add it to genome
chr = new Chromosome(genome, 0, seq.length(), chrName);
genome.add(chr);
} else chr = genome.getChromosome(chrName);
ffi.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy