All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.campagnelab.dl.somatic.mappers.DensityMapper Maven / Gradle / Ivy

There is a newer version: 1.4.0
Show newest version
package org.campagnelab.dl.somatic.mappers;

import org.campagnelab.dl.framework.mappers.FeatureNameMapper;
import org.campagnelab.dl.framework.mappers.NoMaskFeatureMapper;
import org.campagnelab.dl.varanalysis.protobuf.BaseInformationRecords;
import org.nd4j.linalg.api.ndarray.INDArray;

import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.function.Function;

/**
 * Produces feature that represent a density of values for a given number of bins..
 * Created by fac2003 on 10/21/16.
 */
public class DensityMapper extends NoMaskFeatureMapper
        implements FeatureNameMapper {

    protected Function> recordToValues;
    protected int minValue=Integer.MAX_VALUE;
    protected int maxValue=Integer.MIN_VALUE;
    protected float binWidth;
    protected String name;
    protected Function valueFunction;
    int numBins = 10;
    float[] bins;
    protected int[] indices;

    public DensityMapper(String name, int numBins, Properties sbiProperties,
                         Function> recordToValues
    ) {
        this(name, numBins, sbiProperties, recordToValues, Integer::floatValue);
    }

    public DensityMapper(String name1, String name2, int numBins, Properties sbiProperties,
                         Function> recordToValues
    ) {
        this(name1, name2, numBins, sbiProperties, recordToValues, Integer::floatValue);
    }


    /**
     * @param name
     * @param numBins number of bins to represent this density with. If -1, use a bin for every increment between minValue and maxValue.
     * @param sbiProperties
     * @param recordToValues
     * @param valueFunction
     */
    public DensityMapper(String name, int numBins, Properties sbiProperties,
                         Function> recordToValues,
                         Function valueFunction) {
        if (!propertiesPresent(sbiProperties, "stats." + name)) {
            throw new UnsupportedOperationException("The sbip file does not contain the statistics for " + name + " (stats." + name + ".min and stats." + name + ".max)");
        }
        this.minValue = (int)getMin(sbiProperties, "stats." + name);
        this.maxValue = (int)getMax(sbiProperties, "stats." + name);
        constructorHelper(name,numBins,recordToValues,valueFunction);
    }


    //handle case where there are two protobuf fields contributing to one map.
    public DensityMapper(String name1, String name2, int numBins, Properties sbiProperties,
                         Function> recordToValues,
                         Function valueFunction
    ) {

        if (!propertiesPresent(sbiProperties, "stats." + name1)) {
            throw new UnsupportedOperationException("The sbip file does not contain the statistics for " + name1 + " (stats." + name1 + ".min and stats." + name1 + ".max)");
        }
        if (!propertiesPresent(sbiProperties, "stats." + name2)) {
            throw new UnsupportedOperationException("The sbip file does not contain the statistics for " + name2 + " (stats." + name2 + ".min and stats." + name2 + ".max)");
        }
        //extend range to handle two fields
        this.minValue = Math.min((int)getMin(sbiProperties, "stats." + name1),(int)getMin(sbiProperties, "stats." + name2));
        this.maxValue = Math.max((int)getMax(sbiProperties, "stats." + name1),(int)getMax(sbiProperties, "stats." + name2));
        constructorHelper(name1 + "+" + name2,numBins,recordToValues,valueFunction);
    }


    protected void constructorHelper(String name, int numBins,
                                   Function> recordToValues,
                                   Function valueFunction){
        this.name = name;
        this.valueFunction = valueFunction;
        if (numBins == -1){
            this.numBins = (this.maxValue - this.minValue);
        } else {
            this.numBins = numBins;
        }
        bins = new float[this.numBins];
        this.recordToValues = recordToValues;
        this.binWidth = (valueFunction.apply(maxValue) - valueFunction.apply(minValue) )/ this.numBins;
    }



    @Override
    public int numberOfFeatures() {
        return numBins;
    }

    @Override
    public void prepareToNormalize(BaseInformationRecords.BaseInformationOrBuilder record, int indexOfRecord) {
        Arrays.fill(bins, 0);
        List listOfValues = recordToValues.apply(record);
        float numElements = 0;
        for (BaseInformationRecords.NumberWithFrequency n : listOfValues) {
            int featureIndex = (int) ((valueFunction.apply(n.getNumber()) - valueFunction.apply(minValue)) / binWidth);
            if (featureIndex < 0 || featureIndex >= numBins) {
                //ignore points outside of min-max
            } else {
                bins[featureIndex] += n.getFrequency();
                numElements += n.getFrequency();
            }
        }
        // normalize the counts to produce a density:
        if (numElements > 0) {
            for (int featureIndex = 0; featureIndex < numBins; featureIndex++) {
                bins[featureIndex] /= numElements;
            }
        }
    }

    @Override
    public void mapFeatures(BaseInformationRecords.BaseInformationOrBuilder record, INDArray inputs, int indexOfRecord) {
        mapFeatures(record, bins, 0, indexOfRecord);
        for (int featureIndex = 0; featureIndex < numberOfFeatures(); featureIndex++) {
            indices[1] = featureIndex;
            inputs.putScalar(indices, bins[featureIndex]);
        }
    }

    @Override
    public float produceFeature(BaseInformationRecords.BaseInformationOrBuilder record, int featureIndex) {
        return bins[featureIndex];
    }

    public void mapFeatures(BaseInformationRecords.BaseInformationOrBuilder record, float[] inputs, int offset, int indexOfRecord) {
        // do not copy if inputs is bins (call from mapFeatures above)
        if (inputs != bins) {
            System.arraycopy(bins, 0, inputs, offset, numBins);
        }
    }

    @Override
    public String getFeatureName(int featureIndex) {
        float binMin = 0;
        float binMax = 0;
        for (int i = 0; i < numBins; i++) {
            if (i < featureIndex) {
                binMin += binWidth;
            }
            if (i <= featureIndex) {
                binMax += binWidth;
            }
        }
        return String.format("density_%s_%s_%s", name, Float.toString(binMin),  Float.toString(binMax));
    }


    protected boolean propertiesPresent(Properties sbiProperties, String s) {
        final boolean minPresent = sbiProperties.containsKey(s + ".min");
        final boolean maxPresent = sbiProperties.containsKey(s + ".max");
        return (minPresent && maxPresent);
    }

    protected float getMin(Properties sbiProperties, String propertyName) {
        return Float.parseFloat(sbiProperties.getProperty(propertyName + ".min"));
    }

    protected float getMax(Properties sbiProperties, String propertyName) {
        return Float.parseFloat(sbiProperties.getProperty(propertyName + ".max"));
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy