All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.biodata.tools.pedigree.ModeOfInheritance Maven / Gradle / Ivy

There is a newer version: 3.3.0
Show newest version
package org.opencb.biodata.tools.pedigree;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.opencb.biodata.models.clinical.ClinicalProperty.Penetrance;
import org.opencb.biodata.models.clinical.Disorder;
import org.opencb.biodata.models.clinical.pedigree.Member;
import org.opencb.biodata.models.clinical.pedigree.Pedigree;
import org.opencb.biodata.models.clinical.pedigree.PedigreeManager;
import org.opencb.biodata.models.pedigree.IndividualProperty;
import org.opencb.biodata.models.variant.Genotype;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.stream.Collectors;

public class ModeOfInheritance {

    public static final int GENOTYPE_0_0 = 0;
    public static final int GENOTYPE_0_1 = 1;
    public static final int GENOTYPE_1_1 = 2;

    public static final int GENOTYPE_0 = 3;
    public static final int GENOTYPE_1 =4;
    public static final String TRANSCRIPTS_LIST = "transcripts";
    public static final String PARENTAL_ORIGIN = "parentalOrigin";
    public static final String MATERNAL = "maternal";
    public static final String PATERNAL = "paternal";

    public static Set lof;
    public static Set extendedLof;
    public static Set proteinCoding;

    protected static final Comparator VARIANT_COMPARATOR = Comparator.comparing(Variant::getChromosome)
            .thenComparing(Variant::getStart)
            .thenComparing(Variant::getEnd)
            .thenComparing(Variant::getReference)
            .thenComparing(Variant::getAlternate)
            .thenComparing(Variant::toString);

    private static Logger logger;

    static {
        proteinCoding = new HashSet<>(Arrays.asList("protein_coding", "IG_C_gene", "IG_D_gene", "IG_J_gene", "IG_V_gene",
                "nonsense_mediated_decay", "non_stop_decay", "TR_C_gene", "TR_D_gene", "TR_J_gene", "TR_V_gene"));

        lof = new HashSet<>(Arrays.asList("SO:0001893", "transcript_ablation", "SO:0001574", "splice_acceptor_variant",
                "SO:0001575", "splice_donor_variant", "SO:0001587", "stop_gained", "SO:0001589", "frameshift_variant",
                "SO:0001578", "stop_lost", "SO:0002012", "start_lost", "SO:0001889", "transcript_amplification",
                "SO:0001821", "inframe_insertion", "SO:0001822", "inframe_deletion"));

        extendedLof = new HashSet<>(lof);
        extendedLof.addAll(Arrays.asList("SO:0001582", "initiator_codon_variant", "SO:0001583", "missense_variant",
                "SO:0001630", "splice_region_variant", "SO:0001626", "incomplete_terminal_codon_variant"));

        logger = LoggerFactory.getLogger(ModeOfInheritance.class.toString());
    }


    public static Map> dominant(Pedigree pedigree, Disorder disorder, Penetrance penetrance) {
        PedigreeManager pedigreeManager = new PedigreeManager(pedigree);

        // Get affected individuals for that phenotype
        Set affectedMembers = pedigreeManager.getAffectedIndividuals(disorder);

        // Get all possible genotypes for each individual
        Map> genotypes = new HashMap<>();
        for (Member member : pedigree.getMembers()) {
            genotypes.put(member.getId(), calculateDominant(affectedMembers.contains(member), penetrance));
        }

        // Validate genotypes using relationships
        validateGenotypes(genotypes, pedigreeManager);

        if (!isValidModeOfInheritance(genotypes, pedigree, affectedMembers)) {
            return emptyMapOfGenotypes(genotypes);
        }

        // Return a readable output, i.e., returning "0/0, "0/1", "1/1"
        return prepareOutput(genotypes);
    }

    public static Map> recessive(Pedigree pedigree, Disorder disorder, Penetrance penetrance) {
        PedigreeManager pedigreeManager = new PedigreeManager(pedigree);

        // Get affected individuals for that phenotype
        Set affectedMembers = pedigreeManager.getAffectedIndividuals(disorder);

        // Get all possible genotypes for each individual
        Map> genotypes = new HashMap<>();
        for (Member member : pedigree.getMembers()) {
            genotypes.put(member.getId(), calculateRecessive(affectedMembers.contains(member), penetrance));
        }

        // Validate genotypes using relationships
        validateGenotypes(genotypes, pedigreeManager);

        if (!isValidModeOfInheritance(genotypes, pedigree, affectedMembers)) {
            return emptyMapOfGenotypes(genotypes);
        }

        // Return a readable output, i.e., returning "0/0, "0/1", "1/1"
        return prepareOutput(genotypes);
    }

    public static Map> xLinked(Pedigree pedigree, Disorder disorder, boolean isDominant,
                                                    Penetrance penetrance) {
        PedigreeManager pedigreeManager = new PedigreeManager(pedigree);

        // Get affected individuals for that phenotype
        Set affectedMembers = pedigreeManager.getAffectedIndividuals(disorder);

        // Get all possible genotypes for each individual
        Map> genotypes = new HashMap<>();

        for (Member member : pedigree.getMembers()) {
            if (affectedMembers.contains(member)) {
                if (member.getSex().getSex() == IndividualProperty.Sex.MALE) {
                    Set genotype = new HashSet<>();
                    genotype.add(GENOTYPE_1);
                    genotypes.put(member.getId(), genotype);
                } else {
                    // Female
                    Set genotype = new HashSet<>();
                    if (isDominant) {
                        genotype.add(GENOTYPE_0_1);
                    }
                    genotype.add(GENOTYPE_1_1);
                    genotypes.put(member.getId(), genotype);
                }
            } else {
                if (member.getSex().getSex() == IndividualProperty.Sex.MALE) {
                    Set genotype = new HashSet<>();
                    genotype.add(GENOTYPE_0);
                    if (penetrance == Penetrance.INCOMPLETE) {
                        genotype.add(GENOTYPE_1);
                    }
                    genotypes.put(member.getId(), genotype);
                } else {
                    Set genotype = new HashSet<>();
                    genotype.add(GENOTYPE_0_0);
                    if (!isDominant || penetrance == Penetrance.INCOMPLETE) {
                        genotype.add(GENOTYPE_0_1);
                    }
                    if (penetrance == Penetrance.INCOMPLETE) {
                        genotype.add(GENOTYPE_1_1);
                    }
                    genotypes.put(member.getId(), genotype);
                }
            }
        }

        logger.debug("Genotypes before validating: {}", genotypes);

        // Validate genotypes using relationships
        validateGenotypes(genotypes, pedigreeManager);

        logger.debug("Genotypes after validating: {}", genotypes);

        if (!isValidModeOfInheritance(genotypes, pedigree, affectedMembers)) {
            return emptyMapOfGenotypes(genotypes);
        }

        // Return a readable output
        return prepareOutput(genotypes);
    }

    public static Map> yLinked(Pedigree pedigree, Disorder disorder, Penetrance penetrance) {
        PedigreeManager pedigreeManager = new PedigreeManager(pedigree);

        // Get affected individuals for that phenotype
        Set affectedMembers = pedigreeManager.getAffectedIndividuals(disorder);

        // Get all possible genotypes for each individual
        Map> genotypes = new HashMap<>();

        for (Member member : pedigree.getMembers()) {
            if (affectedMembers.contains(member)) {
                if (member.getSex().getSex() == IndividualProperty.Sex.MALE) {
                    Set genotype = new HashSet<>();
                    genotype.add(GENOTYPE_1);
                    genotypes.put(member.getId(), genotype);
                } else {
                    // Found affected female!!??

                    // We fill the rest of the genotypes map
                    for (Member pedigreeMember : pedigree.getMembers()) {
                        genotypes.put(pedigreeMember.getId(), Collections.emptySet());
                    }
                    return emptyMapOfGenotypes(genotypes);
                }
            } else {
                if (member.getSex().getSex() == IndividualProperty.Sex.MALE) {
                    Set genotype = new HashSet<>();
                    genotype.add(GENOTYPE_0);
                    if (penetrance == Penetrance.INCOMPLETE) {
                        genotype.add(GENOTYPE_1);
                    }
                    genotypes.put(member.getId(), genotype);
                } else {
                    genotypes.put(member.getId(), new HashSet<>());
                }
            }
        }

        // Check for impossible situations
        Queue queue = new LinkedList<>(pedigreeManager.getWithoutChildren());
        while (!queue.isEmpty()) {
            Member child = queue.remove();

            if (child.getSex().getSex() == IndividualProperty.Sex.MALE && child.getFather() != null && StringUtils.isNotEmpty(child.getFather().getId())) {
                // Both or none of them should be affected
                Set childGenotypes = genotypes.get(child.getId());
                Set fatherGenotypes = genotypes.get(child.getFather().getId());

                if (!childGenotypes.containsAll(fatherGenotypes)) {
                    // Father and son have different genotypes, which shouldn't be possible
                    return emptyMapOfGenotypes(genotypes);
                }
            }

            if (child.getFather() != null && StringUtils.isNotEmpty(child.getFather().getId())) {
                queue.add(child.getFather());
            }
            if (child.getMother() != null && StringUtils.isNotEmpty(child.getMother().getId())) {
                queue.add(child.getMother());
            }
        }

        // Return a readable output, i.e., returning "-", "0", "1"
        return prepareOutput(genotypes);
    }

    public static Map> mitochondrial(Pedigree pedigree, Disorder disorder, Penetrance penetrance) {
        PedigreeManager pedigreeManager = new PedigreeManager(pedigree);

        // Get affected individual ids for that phenotype
        Set affectedMembers = pedigreeManager.getAffectedIndividuals(disorder).stream().map(Member::getId)
                .collect(Collectors.toSet());

        Map> genotypes = new HashMap<>();

        if (pedigree.getProband() != null) {
            Set genotype = new HashSet<>();
            if (affectedMembers.contains(pedigree.getProband().getId())) {
                genotype.add(GENOTYPE_1);
            } else {
                genotype.add(GENOTYPE_0);
            }
            genotypes.put(pedigree.getProband().getId(), genotype);

            if (pedigree.getProband().getMother() != null) {
                genotype = new HashSet<>();
                if (affectedMembers.contains(pedigree.getProband().getMother().getId())) {
                    genotype.add(GENOTYPE_1);
                } else {
                    genotype.add(GENOTYPE_0);
                    if (penetrance == Penetrance.INCOMPLETE) {
                        genotype.add(GENOTYPE_1);
                    }
                }
                genotypes.put(pedigree.getProband().getMother().getId(), genotype);
            }

        }

        // Return a readable output
        return prepareOutput(genotypes);
    }

    public static Map> compoundHeterozygous(Pedigree pedigree) {
        Map> genotypes = new HashMap<>();

        if (pedigree.getProband() != null) {
            genotypes.put(pedigree.getProband().getId(), new HashSet<>(Collections.singletonList(GENOTYPE_0_1)));
            if (pedigree.getProband().getFather() != null) {
                genotypes.put(pedigree.getProband().getFather().getId(), new HashSet<>(Arrays.asList(GENOTYPE_0_0, GENOTYPE_0_1)));
            }
            if (pedigree.getProband().getMother() != null) {
                genotypes.put(pedigree.getProband().getMother().getId(), new HashSet<>(Arrays.asList(GENOTYPE_0_0, GENOTYPE_0_1)));
            }
        }

        // Return a readable output
        return prepareOutput(genotypes);
    }

    /**
     * Return a truly compound heterozygous variants grouped by transcript.
     *
     * @param iterator  Variant iterator
     * @param probandSampleIdx  Proband sample index
     * @param motherSampleIdx  Mother sample index
     * @param fatherSampleIdx  Father sample index
     * @return Map of transcript - variant list
     */
    public static Map> compoundHeterozygous(Iterator iterator, int probandSampleIdx, int motherSampleIdx,
                                                                  int fatherSampleIdx) {
        return compoundHeterozygousPrivate(iterator, probandSampleIdx, motherSampleIdx, fatherSampleIdx, Integer.MAX_VALUE, extendedLof, proteinCoding).getRight();
    }

    /**
     * Return a truly compound heterozygous variants grouped by transcript.
     *
     * @param iterator         Variant iterator
     * @param probandSampleIdx Proband sample index
     * @param motherSampleIdx  Mother sample index
     * @param fatherSampleIdx  Father sample index
     * @param limit            limit number of different variants
     * @return Map of transcript - variant list
     */
    public static List compoundHeterozygous(Iterator iterator,
                                                     int probandSampleIdx, int motherSampleIdx, int fatherSampleIdx, int limit) {
        return compoundHeterozygous(iterator, probandSampleIdx, motherSampleIdx, fatherSampleIdx, limit, extendedLof, proteinCoding);
    }

    /**
     * Return a truly compound heterozygous variants grouped by transcript.
     *
     * @param iterator         Variant iterator
     * @param probandSampleIdx Proband sample index
     * @param motherSampleIdx  Mother sample index
     * @param fatherSampleIdx  Father sample index
     * @param limit            limit number of different variants
     * @param acceptedSoTerm   Accepted SequenceOntologyTerm
     * @param acceptedBiotypes Accepted Biotypes
     * @return Map of transcript - variant list
     */
    public static List compoundHeterozygous(Iterator iterator, int probandSampleIdx, int motherSampleIdx,
                                                     int fatherSampleIdx, int limit, Set acceptedSoTerm, Set acceptedBiotypes) {
        if (limit <= 0) {
            limit = Integer.MAX_VALUE;
        }
        List variants = new ArrayList<>(compoundHeterozygousPrivate(
                iterator, probandSampleIdx, motherSampleIdx, fatherSampleIdx, limit, acceptedSoTerm, acceptedBiotypes).getLeft());
        if (variants.size() > limit) {
            variants = variants.subList(0, limit);
        }
        return variants;
    }

    private static Pair, Map>> compoundHeterozygousPrivate(
            Iterator iterator, int probandSampleIdx, int motherSampleIdx, int fatherSampleIdx,
            int limit, Set acceptedSoTerm, Set acceptedBiotypes) {
        Set variants = new TreeSet<>(VARIANT_COMPARATOR);
        int variantsRetrieved = 0;

        // Map: transcript to pair (pair-left for mother and pair-right for father)
        Map, List>> transcriptToVariantsMap = new HashMap<>();

        if (motherSampleIdx < 0 && fatherSampleIdx < 0) {
            logger.error("Missing mother and father computing CompoundHet");
            return Pair.of(Collections.emptySet(), Collections.emptyMap());
        }

        String motherGenotype;
        String fatherGenotype;

        while (iterator.hasNext() && variants.size() < limit) {
            Variant variant = iterator.next();
            logger.debug("Variant: '{}'", variant.toStringSimple());

            variantsRetrieved += 1;

            StudyEntry studyEntry = variant.getStudies().get(0);
            int gtIdx = studyEntry.getSampleDataKeys().indexOf("GT");

            String probandGenotype = studyEntry.getSampleData(probandSampleIdx).get(gtIdx);
            if (motherSampleIdx < 0) {
                // Missing mother
                fatherGenotype = studyEntry.getSampleData(fatherSampleIdx).get(gtIdx);
                motherGenotype = getComplementaryCHGenotype(fatherGenotype);
            } else if (fatherSampleIdx < 0) {
                // Missing father
                motherGenotype = studyEntry.getSampleData(motherSampleIdx).get(gtIdx);
                fatherGenotype = getComplementaryCHGenotype(motherGenotype);
            } else {
                motherGenotype = studyEntry.getSampleData(motherSampleIdx).get(gtIdx);
                fatherGenotype = studyEntry.getSampleData(fatherSampleIdx).get(gtIdx);
            }

            if (!probandGenotype.contains("0") || !probandGenotype.contains("1")) {
                logger.debug("Skipping variant '{}'. The proband is '{}' and not 0/1", variant, probandGenotype);
                continue;
            }

            if ((fatherGenotype.contains("1") && motherGenotype.contains("1"))
                    || (!fatherGenotype.contains("1") && !motherGenotype.contains("1"))) {
                logger.debug("Skipping variant '{}'. The parents are both 0/0 or 0/1", variant);
                continue;
            }

            int pairIndex;
            if (fatherGenotype.contains("1") && !motherGenotype.contains("1")) {
                pairIndex = 0;
            } else if (motherGenotype.contains("1") && !fatherGenotype.contains("1")) {
                pairIndex = 1;
            } else {
                logger.warn("This should never happen!!!");
                continue;
            }

            for (ConsequenceType consequenceType : variant.getAnnotation().getConsequenceTypes()) {
                String transcriptId = consequenceType.getEnsemblTranscriptId();
                if (StringUtils.isNotEmpty(transcriptId)
                        && (acceptedSoTerm.isEmpty() || acceptedBiotypes.contains(consequenceType.getBiotype()))) {
                    if (CollectionUtils.isNotEmpty(consequenceType.getSequenceOntologyTerms())) {
                        for (SequenceOntologyTerm soTerm : consequenceType.getSequenceOntologyTerms()) {
                            if (acceptedSoTerm.isEmpty()
                                    || acceptedSoTerm.contains(soTerm.getName())
                                    || acceptedSoTerm.contains(soTerm.getAccession())) {
                                Pair, List> pair = transcriptToVariantsMap
                                        .computeIfAbsent(transcriptId, k -> Pair.of(new ArrayList<>(), new ArrayList<>()));
                                if (pairIndex == 0) {
                                    // From mother
                                    addParentVariant(variant, pair.getLeft(), pair.getRight(), variants);
                                } else {
                                    // From father
                                    addParentVariant(variant, pair.getRight(), pair.getLeft(), variants);
                                }
                            }
                        }
                    }
                }
            }
        }

        Map> variantMap = new HashMap<>();
        int totalVariants = 0;
        for (Map.Entry, List>> entry : transcriptToVariantsMap.entrySet()) {
            if (entry.getValue().getLeft().size() > 0 && entry.getValue().getRight().size() > 0) {
                List transcriptVariants = new ArrayList<>(entry.getValue().getLeft().size() + entry.getValue().getRight().size());
                for (Variant variant : entry.getValue().getLeft()) {
                    // From mother
                    addIssueEntry(variant, probandSampleIdx, entry.getKey(), MATERNAL);
                    transcriptVariants.add(variant);
                }
                for (Variant variant : entry.getValue().getRight()) {
                    // From father
                    addIssueEntry(variant, probandSampleIdx, entry.getKey(), PATERNAL);
                    transcriptVariants.add(variant);
                }
                variantMap.put(entry.getKey(), transcriptVariants);
                totalVariants += variantMap.get(entry.getKey()).size();
            }
        }

//        ObjectMapper objectMapper = new ObjectMapper();
//        objectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
//        objectMapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false);
//        objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
//        objectMapper.configure(DeserializationFeature.FAIL_ON_NULL_FOR_PRIMITIVES, false);
//        objectMapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true);
//
//        try {
//            logger.debug("TranscriptToVariantsMap: {}", objectMapper.writer().writeValueAsString(transcriptToVariantsMap));
//            logger.debug("VariantMap: {}", objectMapper.writer().writeValueAsString(variantMap));
//        } catch (Exception e) {
//            logger.error("{}", e.getMessage());
//        }

        logger.debug("CH - Number of variants retrieved: {}; Found {} CH variants in {} transcripts",
                variantsRetrieved, totalVariants, variantMap.size());

        // Return
        return Pair.of(variants, variantMap);
    }

    private static void addIssueEntry(Variant variant, int probandSampleIdx, String transcript, String parentalOrigin) {
        StudyEntry studyEntry = variant.getStudies().get(0);
        IssueEntry issueEntry = null;
        if (studyEntry.getIssues() == null) {
            studyEntry.setIssues(new ArrayList<>(1));
        } else {
            for (IssueEntry issue : studyEntry.getIssues()) {
                if (issue.getType().equals(IssueType.COMPOUND_HETEROZYGOUS)) {
                    issueEntry = issue;
                    break;
                }
            }
        }
        if (issueEntry == null) {
            SampleEntry sampleEntry = studyEntry.getSample(probandSampleIdx);
            issueEntry = new IssueEntry(IssueType.COMPOUND_HETEROZYGOUS,
                    new SampleEntry(sampleEntry.getSampleId(), sampleEntry.getFileIndex(), null), new HashMap<>());
            studyEntry.getIssues().add(issueEntry);
        }
        issueEntry.getData().compute(TRANSCRIPTS_LIST, (k, transcripts) -> {
            return transcripts == null ? transcript : (transcripts + "," + transcript);
        });
        issueEntry.getData().put(PARENTAL_ORIGIN, parentalOrigin);
    }

    private static void addParentVariant(Variant variant, List currentParentList, List otherParentList, Set variants) {
        currentParentList.add(variant);
        if (!otherParentList.isEmpty()) {
            variants.add(variant); // Add current variant to totalVariants count
            if (currentParentList.size() == 1) { // current variants was the first in the group, sum all other
                // variants
                variants.addAll(otherParentList);
            }
        }
    }

    private static String getComplementaryCHGenotype(String parentGenotype) {
        String otherParentGenotype = "1/1";
        if (parentGenotype.contains("0") && parentGenotype.contains("1")) {
            otherParentGenotype = "0/0";
        } else if (parentGenotype.contains("0") && !parentGenotype.contains("1")) {
            otherParentGenotype = "0/1";
        }

        return otherParentGenotype;
    }

    @Deprecated
    public static List compoundHeterozygosity(Pedigree pedigree, Iterator variantIterator) throws Exception {
        Member child = pedigree.getProband();

        if (child == null || StringUtils.isEmpty(child.getId())) {
            throw new Exception("Missing proband in pedigree");
        }

        if (child.getFather() == null) {
            throw new Exception("Missing father for " + child.getId());
        }
        if (child.getMother() == null) {
            throw new Exception("Missing mother for " + child.getId());
        }

        Member father = child.getFather();
        Member mother = child.getMother();

        // Here we will put all the variant ids that would match parents (0/0 0/1) -> child (0/1)
        List fatherExplainedVariantList = new ArrayList<>();
        List motherExplainedVariantList = new ArrayList<>();

        while (variantIterator.hasNext()) {
            Variant variant = variantIterator.next();

            // We assume the variant iterator will always contain information for one study
            StudyEntry study = variant.getStudies().get(0);

            switch (compoundHeterozygosityVariantExplainType(
                    study.getSampleData(child.getId(), "GT"),
                    study.getSampleData(father.getId(), "GT"),
                    study.getSampleData(mother.getId(), "GT"))) {
                case 1:
                    fatherExplainedVariantList.add(variant);
                    break;
                case 2:
                    motherExplainedVariantList.add(variant);
                    break;
            }

        }

        if (!fatherExplainedVariantList.isEmpty() && !motherExplainedVariantList.isEmpty()) {
            List variantList = new ArrayList<>(fatherExplainedVariantList.size() + motherExplainedVariantList.size());
            variantList.addAll(fatherExplainedVariantList);
            variantList.addAll(motherExplainedVariantList);
            return variantList;
        }

        // No variants support the model
        return Collections.emptyList();
    }

    @Deprecated
    public static int compoundHeterozygosityVariantExplainType(String childGtStr, String fatherGtStr, String motherGtStr) {
        Genotype childGt = new Genotype(childGtStr);

        // Child is 0/1 or 0|1
        if (childGt.getAllelesIdx().length == 2 && ((childGt.getAllelesIdx()[0] == 0 && childGt.getAllelesIdx()[1] == 1)
                || (childGt.getAllelesIdx()[0] == 1 && childGt.getAllelesIdx()[1] == 0))) {
            Genotype fatherGt = new Genotype(fatherGtStr);
            if (fatherGt.getAllelesIdx().length == 2) {
                if (fatherGt.getAllelesIdx()[0] == 0 && fatherGt.getAllelesIdx()[1] == 0) {
                    Genotype motherGt = new Genotype(motherGtStr);
                    if (motherGt.getAllelesIdx().length == 2
                            && ((motherGt.getAllelesIdx()[0] == 0 && motherGt.getAllelesIdx()[1] == 1)
                            || (motherGt.getAllelesIdx()[0] == 1 && motherGt.getAllelesIdx()[1] == 0))) {
                        // Mother explained variant
                        return 2;
                    }
                } else if ((fatherGt.getAllelesIdx()[0] == 0 && fatherGt.getAllelesIdx()[1] == 1)
                        || (fatherGt.getAllelesIdx()[0] == 1 && fatherGt.getAllelesIdx()[1] == 0)) {
                    Genotype motherGt = new Genotype(motherGtStr);
                    if (motherGt.getAllelesIdx().length == 2 && motherGt.getAllelesIdx()[0] == 0 && motherGt.getAllelesIdx()[1] == 0) {
                        // Father explained variant
                        return 1;
                    }
                }
            }
        }
        return 0;
    }

    public static Map> deNovo(Pedigree pedigree) {
        Map> genotypes = new HashMap<>();

        if (pedigree.getProband() != null) {
            if (pedigree.getProband().getFather() == null || pedigree.getProband().getMother() == null) {
                genotypes.put(pedigree.getProband().getId(), new HashSet<>(Arrays.asList(GENOTYPE_1_1, GENOTYPE_1)));
            } else {
                genotypes.put(pedigree.getProband().getId(), new HashSet<>(Arrays.asList(GENOTYPE_0_1, GENOTYPE_1_1, GENOTYPE_1)));
            }
        }

        // Return a readable output
        return prepareOutput(genotypes);
    }

    public static List deNovo(Iterator iterator, int probandSampleIdx, int motherSampleIdx, int fatherSampleIdx) {
        List variants = new ArrayList<>();

        Genotype motherGenotype;
        Genotype fatherGenotype;

        int variantsRetrieved = 0;
        while (iterator.hasNext()) {
            Variant variant = iterator.next();
            variantsRetrieved += 1;

            StudyEntry studyEntry = variant.getStudies().get(0);
            int gtIdx = studyEntry.getSampleDataKeys().indexOf("GT");

            Genotype probandGenotype = new Genotype(studyEntry.getSampleData(probandSampleIdx).get(gtIdx));

            if (motherSampleIdx < 0 && fatherSampleIdx >= 0) {
                fatherGenotype = new Genotype(studyEntry.getSampleData(fatherSampleIdx).get(gtIdx));
                motherGenotype = null;
            } else if (fatherSampleIdx < 0 && motherSampleIdx >= 0) {
                motherGenotype = new Genotype(studyEntry.getSampleData(motherSampleIdx).get(gtIdx));
                fatherGenotype = null;
            } else {
                motherGenotype = new Genotype(studyEntry.getSampleData(motherSampleIdx).get(gtIdx));
                fatherGenotype = new Genotype(studyEntry.getSampleData(fatherSampleIdx).get(gtIdx));
            }

            if (MendelianError.isDeNovo(fatherGenotype, motherGenotype, probandGenotype, variant.getChromosome())) {
                variants.add(variant);
            }
        }

        logger.debug("De novo - Number of variants retrieved: {}; Number of de novo variants: {}", variantsRetrieved, variants.size());

        // Return
        return variants;
    }

    private static boolean isValidModeOfInheritance(Map> genotypes, Pedigree pedigree,
                                                    Set affectedMembers) {
        for (Member member : pedigree.getMembers()) {
            if (member.getMother() != null && member.getFather() != null) {
                Set childGenotypes = genotypes.get(member.getId());
                Set motherGenotypes = genotypes.get(member.getMother().getId());
                Set fatherGenotypes = genotypes.get(member.getFather().getId());

                if (childGenotypes.isEmpty()) {
                    return false;
                }

                if (childGenotypes.size() == 1 && childGenotypes.contains(GENOTYPE_0_0)) {
                    if (affectedMembers.contains(member)) {
                        return false;
                    }

                    if (motherGenotypes.size() == 1 && motherGenotypes.contains(GENOTYPE_1_1) && fatherGenotypes.size() == 1
                            && fatherGenotypes.contains(GENOTYPE_1_1)) {
                        return false;
                    }
                } else if (childGenotypes.size() == 1 && childGenotypes.contains(GENOTYPE_1_1)) {
                    if (!affectedMembers.contains(member)) {
                        return false;
                    }

                    if (motherGenotypes.size() == 1 && motherGenotypes.contains(GENOTYPE_0_0) && fatherGenotypes.size() == 1
                            && fatherGenotypes.contains(GENOTYPE_0_0)) {
                        return false;
                    }
                }
            }
        }

        return true;
    }

    public static boolean isEmptyMapOfGenotypes(Map> genotypes) {
        return MapUtils.isEmpty(genotypes) || genotypes.entrySet().stream()
                .filter(entry -> org.opencb.commons.utils.ListUtils.isNotEmpty(entry.getValue()))
                .map(Map.Entry::getKey)
                .collect(Collectors.toList()).isEmpty();
    }

    private static Set calculateDominant(boolean affected, Penetrance penetrance) {
        Set gt = new HashSet<>();
        if (affected) {
            gt.add(GENOTYPE_0_1);
            gt.add(GENOTYPE_1_1);
        } else {
            gt.add(GENOTYPE_0_0);
            if (penetrance == Penetrance.INCOMPLETE) {
                gt.add(GENOTYPE_0_1);
                gt.add(GENOTYPE_1_1);
            }
        }
        return gt;
    }

    private static Set calculateRecessive(boolean affected, Penetrance penetrance) {
        Set gt = new HashSet<>();
        if (affected) {
            gt.add(GENOTYPE_1_1);
        } else {
            gt.add(GENOTYPE_0_0);
            gt.add(GENOTYPE_0_1);
            if (penetrance == Penetrance.INCOMPLETE) {
                gt.add(GENOTYPE_1_1);
            }
        }
        return gt;
    }

    /**
     * Validate and removes and genotypes that does not make sense given the parent - child relation.
     * This method should only be called under dominant, recessive and x-linked modes of inheritance. It does not support y-linked modes
     * where the mother does not have a possible genotype.
     *
     * @param gt              Map of individual id - set of possible genotypes.
     * @param pedigreeManager Pedigree manager.
     */
    private static void validateGenotypes(Map> gt, PedigreeManager pedigreeManager) {
        List withoutChildren = pedigreeManager.getWithoutChildren();

        Queue queue = new LinkedList<>();

        for (Member member : withoutChildren) {
            queue.add(member.getId());
        }

        while (!queue.isEmpty()) {
            String individualId = queue.remove();
            Member member = pedigreeManager.getIndividualMap().get(individualId);
            processIndividual(member, gt);

            if (member.getFather() != null) {
                if (!queue.contains(member.getFather().getId())) {
                    queue.add(member.getFather().getId());
                }
            }

            if (member.getMother() != null) {
                if (!queue.contains(member.getMother().getId())) {
                    queue.add(member.getMother().getId());
                }
            }
        }
    }

    private static void processIndividual(Member member, Map> gt) {
        // 1. We first process them independently so the possible genotypes are reduced

        // From father to child
        if (member.getFather() != null) {
            gt.put(member.getId(), validate(gt.get(member.getFather().getId()), gt.get(member.getId())));
        }

        // From mother to child
        if (member.getMother() != null) {
            gt.put(member.getId(), validate(gt.get(member.getMother().getId()), gt.get(member.getId())));
        }

        // From child to father
        if (member.getFather() != null) {
            gt.put(member.getFather().getId(), validate(gt.get(member.getId()), gt.get(member.getFather().getId())));
        }

        // From child to mother
        if (member.getMother() != null) {
            gt.put(member.getMother().getId(), validate(gt.get(member.getId()), gt.get(member.getMother().getId())));
        }

        // 2. We now perform a comparison having a look at both parents
        if (member.getMother() != null && member.getFather() != null) {
            Set fatherGenotypes = gt.get(member.getFather().getId());
            Set motherGenotypes = gt.get(member.getMother().getId());
            Set childGenotypes = gt.get(member.getId());

            Set finalGenotypes = new HashSet<>();
            for (int childGenotype : childGenotypes) {
                if (childGenotype == GENOTYPE_0_0) {
                    if ((fatherGenotypes.contains(GENOTYPE_0_0) || fatherGenotypes.contains(GENOTYPE_0_1)
                            || fatherGenotypes.contains(GENOTYPE_0))
                            && (motherGenotypes.contains(GENOTYPE_0_0) || motherGenotypes.contains(GENOTYPE_0_1))) {
                        finalGenotypes.add(GENOTYPE_0_0);
                    }
                } else if (childGenotype == GENOTYPE_0_1) {
                    if (((fatherGenotypes.contains(GENOTYPE_0_0) || fatherGenotypes.contains(GENOTYPE_0_1)
                            || fatherGenotypes.contains(GENOTYPE_0))
                            && (motherGenotypes.contains(GENOTYPE_0_1) || motherGenotypes.contains(GENOTYPE_1_1)))
                            || ((motherGenotypes.contains(GENOTYPE_0_0) || motherGenotypes.contains(GENOTYPE_0_1))
                            && (fatherGenotypes.contains(GENOTYPE_0_1) || fatherGenotypes.contains(GENOTYPE_1_1)
                            || fatherGenotypes.contains(GENOTYPE_1)))) {
                        finalGenotypes.add(GENOTYPE_0_1);
                    }
                } else if (childGenotype == GENOTYPE_1_1) {
                    if ((fatherGenotypes.contains(GENOTYPE_0_1) || fatherGenotypes.contains(GENOTYPE_1_1)
                            || fatherGenotypes.contains(GENOTYPE_1))
                            && (motherGenotypes.contains(GENOTYPE_0_1) || motherGenotypes.contains(GENOTYPE_1_1))) {
                        finalGenotypes.add(GENOTYPE_1_1);
                    }
                }  else {
                    finalGenotypes.add(childGenotype);
                }
            }

            gt.put(member.getId(), finalGenotypes);
        }
    }

    private static void processIndividualCopy(Member member, Map> gt) {
        // From father to child
        if (member.getFather() != null) {
            gt.put(member.getId(), validate(gt.get(member.getFather().getId()), gt.get(member.getId())));
        }

        // From mother to child
        if (member.getMother() != null) {
            gt.put(member.getId(), validate(gt.get(member.getMother().getId()), gt.get(member.getId())));
        }

        // From child to father
        if (member.getFather() != null) {
            gt.put(member.getFather().getId(), validate(gt.get(member.getId()), gt.get(member.getFather().getId())));
        }

        // From child to mother
        if (member.getMother() != null) {
            gt.put(member.getMother().getId(), validate(gt.get(member.getId()), gt.get(member.getMother().getId())));
        }
    }

    private static Set validate(Set from, Set to) {
        Set validGt = new HashSet<>();
        for (int gtFrom : from) {
            for (int gtTo : to) {
                if (gtFrom == GENOTYPE_0_0) {
                    // 0/0 in parent should be...
                    if (gtTo == GENOTYPE_0_0 || gtTo == GENOTYPE_0_1) {
                        validGt.add(gtTo);
                    }
                    // This case should only happen if the gtFrom is from the mother and the gtTo from her son (x-linked)
                    if (gtTo == GENOTYPE_0) {
                        validGt.add(gtTo);
                    }
                } else if (gtFrom == GENOTYPE_1_1) {
                    // 1/1 in parent should be 0/1 or 1/1 in child
                    if (gtTo == GENOTYPE_0_1 || gtTo == GENOTYPE_1_1) {
                        validGt.add(gtTo);
                    }
                    // This case should only happen if the gtFrom is from the mother and the gtTo from her son (x-linked)
                    if (gtTo == GENOTYPE_1) {
                        validGt.add(gtTo);
                    }
                } else if (gtFrom == GENOTYPE_0_1) {
                    // 0/1 in parent can be whatever in child
                    validGt.add(gtTo);
                } else if (gtFrom == GENOTYPE_0) {
                    // Comparison of son (gtFrom) with mother (gtTo)
                    if (gtTo == GENOTYPE_0_0 || gtTo == GENOTYPE_0_1) {
                        validGt.add(gtTo);
                    }
                    // Comparison of son (gtFrom) with father (gtTo)
                    if (gtTo == GENOTYPE_0 || gtTo == GENOTYPE_1) {
                        // The father is not affected by the x-linked genotype
                        validGt.add(gtTo);
                    }
                } else if (gtFrom == GENOTYPE_1) {
                    // Comparison of son (gtFrom) with mother (gtTo)
                    if (gtTo == GENOTYPE_0_1 || gtTo == GENOTYPE_1_1) {
                        validGt.add(gtTo);
                    }
                    // Comparison of son (gtFrom) with father (gtTo)
                    if (gtTo == GENOTYPE_0 || gtTo == GENOTYPE_1) {
                        // The father is not affected by the x-linked genotype
                        validGt.add(gtTo);
                    }
                }
            }
        }
        return validGt;
    }

    private static Map> emptyMapOfGenotypes(Map> genotypes) {
        Map> output = new HashMap<>();
        for (String key : genotypes.keySet()) {
            output.put(key, Collections.emptyList());
        }
        return output;
    }

    private static Map> prepareOutput(Map> genotypes) {
        Map> output = new HashMap<>();
        for (String key : genotypes.keySet()) {
            Set gtList = new HashSet<>();
            Iterator it = genotypes.get(key).iterator();
            while (it.hasNext()) {
                gtList.addAll(toGenotypeString(it.next()));
            }
            output.put(key, new ArrayList<>(gtList));
        }

        logger.debug("Final map of genotypes: {}", output);
        return output;
    }

    public static List toGenotypeString(int gt) {
        switch (gt) {
            case GENOTYPE_0_0:
                return Arrays.asList("0/0", "0|0");
            case GENOTYPE_0_1:
                return Arrays.asList("0/1", "0|1", "1|0");
            case GENOTYPE_1_1:
                return Arrays.asList("1/1", "1|1");
            case GENOTYPE_0:
                return Arrays.asList("0", "0/0", "0|0");
            case GENOTYPE_1:
                return Arrays.asList("1", "1/1", "1|1", "0/1", "0|1", "1|0");
            default:
                return Collections.singletonList("-");
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy