org.opentripplanner.analyst.Histogram Maven / Gradle / Ivy

Go to download
package org.opentripplanner.analyst;

import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
import org.opentripplanner.analyst.core.WeightingFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.stream.IntStream;

/**
 * A pair of parallel histograms representing how many features are located at each amount of travel time 
 * away from from a single origin. One array contains the raw counts of features (e.g. number of places of employment
 * M minutes from home) and the other array contains the weighted sums of those features accounting for their 
 * magnitudes (e.g. the number of jobs in all places of employment M minutes away from home).
 * All time values are rounded down into 1-minute bins (0-60 seconds = minute 0, 61-120 = min 1, etc.)
 */
public class Histogram implements Serializable {
    private static final Logger LOG = LoggerFactory.getLogger(Histogram.class);

    public static final double LOGISTIC_STEEPNESS = -2 / 60.0;

    /**
     * The weighting functions to be used, as an array. Generally this will be an array of 120
     * functions, one to calculate cumulative accessibility for each minute in two hours.
     * But any additive function can be used, and the output of the functions will be places in
     * counts and sums parallel to this array.
     */
    public static WeightingFunction weightingFunction = new WeightingFunction.Logistic(LOGISTIC_STEEPNESS);

    /**
     * The number features that can be reached within each one-minute bin. Index 0 is 0-1 minutes, index 50 is 50-51 
     * minutes, etc. The features are not weighted by their magnitudes, so values represent (for example) the number of 
     * places of employment that can be reached rather than the total number of jobs in all those places of employment.
     */
    public int[] counts;

    /**
     * The weighted sum of all features that can be reached within each one-minute bin.
     * Index 0 is 0-1 minutes, index 50 is 50-51 minutes, etc.
     * Features are weighted by their magnitudes, so values represent (for example) the total number of jobs in
     * all accessible places of employment, rather than the number of places of employment.
     */
    public int[] sums;

    /**
     * Given parallel arrays of travel times and magnitudes for any number of destination features, construct 
     * histograms that represent the distribution of individual features and total opportunities as a function of
     * travel time. The length of the arrays containing these histograms will be equal to the maximum travel time
     * specified in the original search request, in minutes.
     * @param times the time at which each destination is reached. The array will be destructively sorted in place.
     * @param weight the weight or magnitude of each destination reached. it is parallel to times.
     */    
    public Histogram (int[] times, int[] weight) {
        // optimization: bin times and weights by seconds.
        // there will often be more than one destination in a seconds due to the pigeonhole principle:
        // there are a lot more destinations than there are seconds
        int maxSecs = Integer.MIN_VALUE;

        for (int time : times) {
            if (time == Integer.MAX_VALUE)
                continue;

            if (time > maxSecs)
                maxSecs = time;
        }

        int[] binnedCounts = new int[maxSecs + 1];
        int[] binnedWeights = new int[maxSecs + 1];

        for (int i = 0; i < times.length; i++) {
            if (times[i] == Integer.MAX_VALUE)
                continue;

            binnedCounts[times[i]] += 1;
            binnedWeights[times[i]] += weight[i];
        }

        counts = weightingFunction.apply(binnedCounts);
        sums = weightingFunction.apply(binnedWeights);
    }

    /** no-arg constructor for serialization/deserialization */
    public Histogram () {}

    public static Map buildAll (int[] times, PointSet targets) {
        try {
            // bin counts and all properties
            int size = IntStream.of(times).reduce(0, (memo, i) -> i != Integer.MAX_VALUE ? Math.max(i, memo) : memo) + 1;

            int[] binnedCounts = new int[size];

            // use an identity hash map so that lookups are speedy - we only need this in the context of
            // this function
            Map binnedProperties = new IdentityHashMap<>();
            for (String key : targets.properties.keySet()) {
                binnedProperties.put(key, new int[size]);
            }

            for (int fidx = 0; fidx < times.length; fidx++) {
                if (times[fidx] == Integer.MAX_VALUE)
                    continue;

                binnedCounts[times[fidx]] += 1;

                for (Map.Entry prop : targets.properties.entrySet()) {
                    binnedProperties.get(prop.getKey())[times[fidx]] += prop.getValue()[fidx];
                }
            }

            // make the histograms
            // counts are the same for all histograms
            int[] counts = weightingFunction.apply(binnedCounts);

            Map ret = new HashMap<>();

            for (Map.Entry e : binnedProperties.entrySet()) {
                Histogram h = new Histogram();
                h.counts = counts;
                h.sums = weightingFunction.apply(e.getValue());
                ret.put(e.getKey(), h);
            }

            return ret;
        } catch (Exception e) {
            LOG.error("Error constructing histograms", e);
            return null;
        }
    }

    /**
     * Serialize this pair of histograms out as a JSON document using the given JsonGenerator. The format is:
     *  {
     *   sums: [],
     *   counts: []     
     * }  
     */
    public void writeJson(JsonGenerator jgen) throws JsonGenerationException, IOException {
        // The number of features reached during each minute, ignoring their magnitudes
        jgen.writeArrayFieldStart("sums"); {
            for(int sum : sums) {
                jgen.writeNumber(sum);
            }
        }
        jgen.writeEndArray();
        // The total number of opportunities reached during each minute (the sum of the features' magnitudes)
        jgen.writeArrayFieldStart("counts"); {
            for(int count : counts) {
                jgen.writeNumber(count);
            }
        }
        jgen.writeEndArray();
    }
}