All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.campagnelab.dl.somatic.mappers.FractionDifferences Maven / Gradle / Ivy

package org.campagnelab.dl.somatic.mappers;

import org.campagnelab.dl.framework.mappers.FeatureMapper;
import org.campagnelab.dl.somatic.genotypes.BaseGenotypeCountFactory;
import org.campagnelab.dl.somatic.genotypes.GenotypeCountFactory;
import org.campagnelab.dl.varanalysis.protobuf.BaseInformationRecords;
import org.nd4j.linalg.api.ndarray.INDArray;

/**
 * This is a fraction difference mapper, producing (germline proportion of total counts at base) - (somatic proportion of total counts at this base).
 * Not finished and (neural network should be able to learn this linear combination on its own).
 * @author Remi Torracinta, rct66
 */

public class FractionDifferences extends AbstractFeatureMapper
        implements FeatureMapper {


    //only implemented for records with 2 samples exactly
    public static final int FRACTION_NORM = 1;
    public int totalCountsGerm;
    public int totalCountsSom;


    public int numberOfFeatures() {
        // we need features for the normal sample and for the tumor sample:
        // 5 for positive difference, 5 for negative difference
        return AbstractFeatureMapper.MAX_GENOTYPES * 2;
    }

    public void prepareToNormalize(BaseInformationRecords.BaseInformationOrBuilder record, int indexOfRecord) {
        totalCountsGerm = 0;
        totalCountsSom = 0;
        for (int i = 0; i < AbstractFeatureMapper.MAX_GENOTYPES; i++){
            BaseInformationRecords.CountInfo germline = record.getSamples(0).getCounts(i);
            BaseInformationRecords.CountInfo somatic = record.getSamples(1).getCounts(i);
            totalCountsGerm += (germline.getGenotypeCountForwardStrand() + somatic.getGenotypeCountReverseStrand());
            totalCountsSom += (somatic.getGenotypeCountForwardStrand() + germline.getGenotypeCountReverseStrand());
        }
    }


    int[] indices = new int[]{0, 0};

    public void mapFeatures(BaseInformationRecords.BaseInformationOrBuilder record, INDArray inputs, int indexOfRecord) {
        indices[0] = indexOfRecord;
        for (int featureIndex = 0; featureIndex < numberOfFeatures(); featureIndex++) {
            indices[1] = featureIndex;
            inputs.putScalar(indices, produceFeature(record, featureIndex));
        }
    }

    public float produceFeature(BaseInformationRecords.BaseInformationOrBuilder record, int featureIndex) {
        return normalize(produceFeatureInternal(record, featureIndex), FRACTION_NORM);
    }


    private float normalize(float value, int normalizationFactor) {
        if (normalizationFactor == 0) {
            return 0;
        }
        float normalized = value / normalizationFactor;
        assert normalized >= 0 && normalized <= 1 : "value must be normalized: " + normalized;
        return normalized;
    }


    public float produceFeatureInternal(BaseInformationRecords.BaseInformationOrBuilder record, int featureIndex) {
        int direction = 1;
        if (featureIndex >= AbstractFeatureMapper.MAX_GENOTYPES){
            featureIndex = featureIndex - AbstractFeatureMapper.MAX_GENOTYPES;
            direction = -1;
        }
        assert featureIndex >= 0 && featureIndex < AbstractFeatureMapper.MAX_GENOTYPES: "Only MAX_GENOTYPES features";
        int germCounts = getAllCounts(record, false, true).get(featureIndex).forwardCount
                + getAllCounts(record, false, true).get(featureIndex).reverseCount;
        int somCounts = getAllCounts(record, true, true).get(featureIndex).forwardCount
                + getAllCounts(record, true, true).get(featureIndex).reverseCount;
        return Math.max(0,(normalize(germCounts,totalCountsGerm) - normalize(somCounts,totalCountsSom))*direction);
    }

    @Override
    protected GenotypeCountFactory getGenotypeCountFactory() {
        return new BaseGenotypeCountFactory() {

            @Override
            public GenotypeCount create() {
                return new GenotypeCount();
            }
        };
    }


    @Override
    protected void initializeCount(BaseInformationRecords.CountInfo sampleCounts, GenotypeCount count) {
    }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy