All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.campagnelab.dl.somatic.intermediaries.OneSampleCanonicalSimulationStrategy Maven / Gradle / Ivy

package org.campagnelab.dl.somatic.intermediaries;

import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.util.XorShift1024StarRandom;
import org.campagnelab.dl.varanalysis.protobuf.BaseInformationRecords;

import java.util.Collections;

/**
 * A strategy that only considers the germline sample to determine if the site is canonical.
 * Created by fac2003 on 7/19/16.
 */
public class OneSampleCanonicalSimulationStrategy implements SimulationStrategy {
    private XorShift1024StarRandom randomGenerator;
    private double canonThreshold;

    FirstSimulationStrategy firstSimulationStrategy;

    IntArrayList genotypeCounts0 = new IntArrayList();
    IntArrayList genotypeCounts1 = new IntArrayList();
    IntArrayList sortingPermutationGenotypeCounts0 = new IntArrayList();
    IntArrayList sortingPermutationGenotypeCounts1 = new IntArrayList();

    public OneSampleCanonicalSimulationStrategy() {

    }

    @Override
    public void setup(double deltaSmall, double deltaBig, double zygHeuristic, long seed, double canonThreshold) {
        firstSimulationStrategy = new FirstSimulationStrategy();
        firstSimulationStrategy.setup(deltaSmall, deltaBig, zygHeuristic, seed, 0);
        this.canonThreshold = canonThreshold;
        setSeed(seed);
    }

    @Override
    public int numberOfSamplesSupported() {
        return 2;
    }

    @Override
    public BaseInformationRecords.BaseInformation mutate(boolean makeSomatic,
                                                         BaseInformationRecords.BaseInformation record,
                                                         BaseInformationRecords.SampleInfo germlineSample,
                                                         BaseInformationRecords.SampleInfo otherSample,
                                                         SimulationCharacteristics sim) {

        int sumCount0 = getSumCounts(germlineSample, genotypeCounts0);
        int sumCount1 = getSumCounts(otherSample, genotypeCounts1);
        prepareSorted(genotypeCounts0, sortingPermutationGenotypeCounts0);
        prepareSorted(genotypeCounts1, sortingPermutationGenotypeCounts1);
        int numAllelesGermline = sumGenotype90P(sumCount0, genotypeCounts0, sortingPermutationGenotypeCounts0);

        if (numAllelesGermline > 2) {

            // we won't make this somatic because the the germline sample is not diploid.
            // by not making a somatic variant we will prevent the model learning that such sites are valid predictions.
            makeSomatic = false;
        }

        return firstSimulationStrategy.mutate(makeSomatic, record, germlineSample, otherSample, null);
    }


    @Override
    public void setSeed(long seed) {
        randomGenerator = new XorShift1024StarRandom(seed);
        firstSimulationStrategy.setSeed(seed);
    }

    private void prepareSorted(IntArrayList original, IntArrayList permutation) {

        permutation.clear();
        for (int i = 0; i < original.size(); i++) {
            permutation.add(i);
        }
        // sort using the counts of the original
        Collections.sort(permutation, (o1, o2) -> original.getInt(o2) - original.getInt(o1));

    }

    /**
     * Calculate the number of genotypes with the most counts that together achieve 90% of total counts.
     *
     * @param sumCount
     * @param genotypeCounts                   the counts array, in genotype order
     * @param sortingPermutationGenotypeCounts the permutation array, from cumulative order to genotype order
     * @return
     */
    private int sumGenotype90P(int sumCount, IntArrayList genotypeCounts, IntArrayList sortingPermutationGenotypeCounts) {
        int cumulative = 0;
        int index = 0;
        for (int i = 0; i < sortingPermutationGenotypeCounts.size(); i++) {
            int count = getSortedCountAtIndex(i, genotypeCounts, sortingPermutationGenotypeCounts);
            cumulative += count;
            index++;
            if (cumulative > (sumCount * canonThreshold)) return index;

        }
        return index;
    }

    private int getSortedCountAtIndex(int i, IntArrayList genotypeCounts, IntArrayList sortingPermutationGenotypeCounts) {
        return genotypeCounts.getInt(sortingPermutationGenotypeCounts.getInt(i));
    }

    private int getSumCounts(BaseInformationRecords.SampleInfo sample, IntArrayList genotypeCounts) {
        genotypeCounts.clear();
        int sumCounts = 0;
        for (BaseInformationRecords.CountInfo c : sample.getCountsList()) {
            final int count = c.getGenotypeCountForwardStrand() + c.getGenotypeCountReverseStrand();
            genotypeCounts.add(count);
            sumCounts += count;
        }
        return sumCounts;
    }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy