org.opentripplanner.analyst.Histogram Maven / Gradle / Ivy
package org.opentripplanner.analyst;
import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
import org.opentripplanner.analyst.core.WeightingFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.stream.IntStream;
/**
* A pair of parallel histograms representing how many features are located at each amount of travel time
* away from from a single origin. One array contains the raw counts of features (e.g. number of places of employment
* M minutes from home) and the other array contains the weighted sums of those features accounting for their
* magnitudes (e.g. the number of jobs in all places of employment M minutes away from home).
* All time values are rounded down into 1-minute bins (0-60 seconds = minute 0, 61-120 = min 1, etc.)
*/
public class Histogram implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(Histogram.class);
public static final double LOGISTIC_STEEPNESS = -2 / 60.0;
/**
* The weighting functions to be used, as an array. Generally this will be an array of 120
* functions, one to calculate cumulative accessibility for each minute in two hours.
* But any additive function can be used, and the output of the functions will be places in
* counts and sums parallel to this array.
*/
public static WeightingFunction weightingFunction = new WeightingFunction.Logistic(LOGISTIC_STEEPNESS);
/**
* The number features that can be reached within each one-minute bin. Index 0 is 0-1 minutes, index 50 is 50-51
* minutes, etc. The features are not weighted by their magnitudes, so values represent (for example) the number of
* places of employment that can be reached rather than the total number of jobs in all those places of employment.
*/
public int[] counts;
/**
* The weighted sum of all features that can be reached within each one-minute bin.
* Index 0 is 0-1 minutes, index 50 is 50-51 minutes, etc.
* Features are weighted by their magnitudes, so values represent (for example) the total number of jobs in
* all accessible places of employment, rather than the number of places of employment.
*/
public int[] sums;
/**
* Given parallel arrays of travel times and magnitudes for any number of destination features, construct
* histograms that represent the distribution of individual features and total opportunities as a function of
* travel time. The length of the arrays containing these histograms will be equal to the maximum travel time
* specified in the original search request, in minutes.
* @param times the time at which each destination is reached. The array will be destructively sorted in place.
* @param weight the weight or magnitude of each destination reached. it is parallel to times.
*/
public Histogram (int[] times, int[] weight) {
// optimization: bin times and weights by seconds.
// there will often be more than one destination in a seconds due to the pigeonhole principle:
// there are a lot more destinations than there are seconds
int maxSecs = Integer.MIN_VALUE;
for (int time : times) {
if (time == Integer.MAX_VALUE)
continue;
if (time > maxSecs)
maxSecs = time;
}
int[] binnedCounts = new int[maxSecs + 1];
int[] binnedWeights = new int[maxSecs + 1];
for (int i = 0; i < times.length; i++) {
if (times[i] == Integer.MAX_VALUE)
continue;
binnedCounts[times[i]] += 1;
binnedWeights[times[i]] += weight[i];
}
counts = weightingFunction.apply(binnedCounts);
sums = weightingFunction.apply(binnedWeights);
}
/** no-arg constructor for serialization/deserialization */
public Histogram () {}
public static Map buildAll (int[] times, PointSet targets) {
try {
// bin counts and all properties
int size = IntStream.of(times).reduce(0, (memo, i) -> i != Integer.MAX_VALUE ? Math.max(i, memo) : memo) + 1;
int[] binnedCounts = new int[size];
// use an identity hash map so that lookups are speedy - we only need this in the context of
// this function
Map binnedProperties = new IdentityHashMap<>();
for (String key : targets.properties.keySet()) {
binnedProperties.put(key, new int[size]);
}
for (int fidx = 0; fidx < times.length; fidx++) {
if (times[fidx] == Integer.MAX_VALUE)
continue;
binnedCounts[times[fidx]] += 1;
for (Map.Entry prop : targets.properties.entrySet()) {
binnedProperties.get(prop.getKey())[times[fidx]] += prop.getValue()[fidx];
}
}
// make the histograms
// counts are the same for all histograms
int[] counts = weightingFunction.apply(binnedCounts);
Map ret = new HashMap<>();
for (Map.Entry e : binnedProperties.entrySet()) {
Histogram h = new Histogram();
h.counts = counts;
h.sums = weightingFunction.apply(e.getValue());
ret.put(e.getKey(), h);
}
return ret;
} catch (Exception e) {
LOG.error("Error constructing histograms", e);
return null;
}
}
/**
* Serialize this pair of histograms out as a JSON document using the given JsonGenerator. The format is:
* {
* sums: [],
* counts: []
* }
*/
public void writeJson(JsonGenerator jgen) throws JsonGenerationException, IOException {
// The number of features reached during each minute, ignoring their magnitudes
jgen.writeArrayFieldStart("sums"); {
for(int sum : sums) {
jgen.writeNumber(sum);
}
}
jgen.writeEndArray();
// The total number of opportunities reached during each minute (the sum of the features' magnitudes)
jgen.writeArrayFieldStart("counts"); {
for(int count : counts) {
jgen.writeNumber(count);
}
}
jgen.writeEndArray();
}
}