All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.snpeff.vcf.Pedigree Maven / Gradle / Ivy

The newest version!
package org.snpeff.vcf;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

import org.snpeff.fileIterator.VcfFileIterator;
import org.snpeff.util.Gpr;
import org.snpeff.util.Timer;
import org.snpeff.util.Tuple;

/**
 * A pedigree for cancer samples
 */
public class Pedigree implements Iterable {

	boolean debug;
	boolean verbose;
	List pedigree;
	List pedigreeDerived;
	int[] pedigreeDerivedArray;
	VcfFileIterator vcfFile;

	public Pedigree(VcfFileIterator vcfFile) {
		this.vcfFile = vcfFile;
		readPedigreeVcf();
	}

	public Pedigree(VcfFileIterator vcfFile, String cancerSamples) {
		this.vcfFile = vcfFile;
		readPedigreeTxt(cancerSamples);
	}

	/**
	 * Are there any "back to reference" cancer variant
	 */
	public boolean anyBackToRef(VcfEntry vcfEntry) {
		for (PedigreeEntry pe : derived()) {
			int numOri = pe.getOriginalNum();
			int numDer = pe.getDerivedNum();
			VcfGenotype gtOri = vcfEntry.getVcfGenotype(numOri);
			VcfGenotype gtDer = vcfEntry.getVcfGenotype(numDer);

			int gd[] = gtDer.getGenotype(); // Derived genotype
			int go[] = gtOri.getGenotype(); // Original genotype

			// Skip if one of the genotypes is missing
			if (gd == null || go == null) continue;

			if (gtOri.isPhased() && gtDer.isPhased()) {
				// Phased, we only have two possible comparisons
				for (int i = 0; i < 2; i++)
					if ((go[i] > 0) && (gd[i] == 0)) return true;
			} else {
				// Not phased, compare all possible combinations
				for (int o = 0; o < go.length; o++)
					for (int d = 0; d < gd.length; d++)
						if ((go[o] > 0) && (gd[d] == 0)) return true;
			}
		}

		return false;
	}

	/**
	 * Is there any derived sample?
	 */
	public boolean anyDerived() {
		return pedigree.stream().anyMatch(pe -> pe.isDerived());
	}

	/**
	 * Analyze which comparisons to make in cancer genomes
	 */
	public Set> compareCancerGenotypes(VcfEntry vcfEntry) {
		HashSet> comparisons = new HashSet<>();

		// Find out which comparisons have to be analyzed
		for (PedigreeEntry pe : derived()) {
			int numOri = pe.getOriginalNum();
			int numDer = pe.getDerivedNum();
			VcfGenotype gtOri = vcfEntry.getVcfGenotype(numOri);
			VcfGenotype gtDer = vcfEntry.getVcfGenotype(numDer);

			int gd[] = gtDer.getGenotype(); // Derived genotype
			int go[] = gtOri.getGenotype(); // Original genotype

			// Skip if one of the genotypes is missing
			if (gd == null || go == null) continue;

			if (gtOri.isPhased() && gtDer.isPhased()) {
				// Phased, we only have two possible comparisons
				for (int i = 0; i < 2; i++) {
					// Add comparisons
					if ((go[i] >= 0) && (gd[i] >= 0) // Both genotypes are non-missing?
							&& (go[i] != 0) // Origin genotype is non-reference? The case "REF -> ALT" is always analyzed in the default (non-cancer) annotation, so we skip it here
							&& (gd[i] != go[i]) // Both genotypes are different?
					) {
						Tuple compare = new Tuple<>(gd[i], go[i]);
						comparisons.add(compare);
					}
				}
			} else {
				// Not phased, compare all possible combinations
				for (int o = 0; o < go.length; o++) {
					// Origin genotype is non-missing and non-reference?
					// The case "REF -> ALT" is always analyzed in the default (non-cancer) annotation, so we skip it here
					if (go[o] > 0) {
						for (int d = 0; d < gd.length; d++) {
							// Add comparisons
							if ((gd[d] >= 0) // Derived genotype is non-missing?
									&& (gd[d] != go[o]) // Derived genotype differs from original genotype?
							) {
								Tuple compare = new Tuple<>(gd[d], go[o]);
								comparisons.add(compare);
							}
						}
					}
				}
			}
		}

		return comparisons;
	}

	/**
	 * A list of all derived pedigree entries
	 */
	public List derived() {
		if (pedigreeDerived == null) pedigreeDerived = pedigree.stream().filter(pe -> pe.isDerived()).collect(Collectors.toList());
		return pedigreeDerived;
	}

	@Override
	public Iterator iterator() {
		return pedigree.iterator();
	}

	/**
	 * Read pedigree from TXT file
	 */
	void readPedigreeTxt(String cancerSamples) {
		// Read from TXT file
		if (verbose) Timer.showStdErr("Reading cancer samples pedigree from file '" + cancerSamples + "'.");

		List sampleNames = vcfFile.getVcfHeader().getSampleNames();
		pedigree = new ArrayList<>();

		for (String line : Gpr.readFile(cancerSamples).split("\n")) {
			String recs[] = line.split("\\s", -1);
			String original = recs[0];
			String derived = recs[1];

			PedigreeEntry pe = new PedigreeEntry(original, derived);
			pe.sampleNumbers(sampleNames);

			pedigree.add(pe);
		}
	}

	/**
	 * Read pedigree from VCF file's header
	 */
	void readPedigreeVcf() {
		// Read from VCF header
		if (verbose) Timer.showStdErr("Reading cancer samples pedigree from VCF header.");
		pedigree = vcfFile.getVcfHeader().getPedigree();
		if (verbose) Timer.showStdErr("Pedigree: " + pedigree);
	}

	public void setDebug(boolean debug) {
		this.debug = debug;
	}

	public void setVerbose(boolean verbose) {
		this.verbose = verbose;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy