org.heigit.bigspatialdata.oshdb.api.mapreducer.MapReducerAggregations Maven / Gradle / Ivy
Show all versions of oshdb-api Show documentation
package org.heigit.bigspatialdata.oshdb.api.mapreducer;
import org.heigit.bigspatialdata.oshdb.api.generic.WeightedValue;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableBiFunction;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableBinaryOperator;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableFunction;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableSupplier;
/**
* Interface defining the common aggregation methods found on MapReducer or MapAggregator objects.
*
*
* Depending on whether a plain MapReducer or a MapAggregator object the result will either be a
* direct value (e.g. an Integer when counting) or a (sorted) Map of the respective values
* associated with the appropriate index values.
*
*
* @param type the respective MapReducer or MapAggregator currently operates on
*/
interface MapReducerAggregations {
/**
* Generic Map-reduce routine.
*
*
* This can be used to perform an arbitrary reduce routine
*
*
*
* The combination of the used types and identity/reducer functions must make "mathematical"
* sense:
*
*
* - the accumulator and combiner functions need to be associative,
* - values generated by the identitySupplier factory must be an identity for the combiner
* function: `combiner(identitySupplier(),x)` must be equal to `x`,
* - the combiner function must be compatible with the accumulator function:
* `combiner(u, accumulator(identitySupplier(), t)) == accumulator.apply(u, t)`
*
*
*
* Functionally, this interface is similar to Java11 Stream's
* reduce(identity,accumulator,combiner)
* interface.
*
*
* @param identitySupplier a factory function that returns a new starting value to reduce results
* into (e.g. when summing values, one needs to start at zero)
* @param accumulator a function that takes a result from the `mapper` function (type <R>)
* and an accumulation value (type <S>, e.g. the result of `identitySupplier()`) and
* returns the "sum" of the two; contrary to `combiner`, this function is allowed to alter
* (mutate) the state of the accumulation value (e.g. directly adding new values to an
* existing Set object)
* @param combiner a function that calculates the "sum" of two <S> values; this function
* must be pure (have no side effects), and is not allowed to alter the state of the two
* input objects it gets!
* @param the data type used to contain the "reduced" (intermediate and final) results
* @return the result of the map-reduce operation, the final result of the last call to the
* `combiner` function, after all `mapper` results have been aggregated (in the
* `accumulator` and `combiner` steps)
*/
Object reduce(
SerializableSupplier identitySupplier,
SerializableBiFunction accumulator,
SerializableBinaryOperator combiner
) throws Exception;
/**
* Generic map-reduce routine (shorthand syntax).
*
*
* This variant is shorter to program than `reduce(identitySupplier, accumulator, combiner)`, but
* can only be used if the result type is the same as the current `map`ped type <X>. Also
* this variant can be less efficient since it cannot benefit from the mutability freedoms the
* accumulator+combiner approach has.
*
*
*
* The combination of the used types and identity/reducer functions must make "mathematical"
* sense:
*
*
* - the accumulator function needs to be associative,
* - values generated by the identitySupplier factory must be an identity for the accumulator
* function: `accumulator(identitySupplier(),x)` must be equal to `x`,
*
*
*
* Functionally, this interface is similar to Java11 Stream's
* reduce(identity,accumulator)
* interface.
*
*
* @param identitySupplier a factory function that returns a new starting value to reduce results
* into (e.g. when summing values, one needs to start at zero)
* @param accumulator a function that takes a result from the `mapper` function (type <X>)
* and an accumulation value (also of type <X>, e.g. the result of
* `identitySupplier()`) and returns the "sum" of the two; contrary to `combiner`, this
* function is not to alter (mutate) the state of the accumulation value (e.g. directly
* adding new values to an existing Set object)
* @return the result of the map-reduce operation, the final result of the last call to the
* `combiner` function, after all `mapper` results have been aggregated (in the
* `accumulator` and `combiner` steps)
*/
Object reduce(
SerializableSupplier identitySupplier,
SerializableBinaryOperator accumulator
) throws Exception;
/**
* Sums up the results.
*
*
* The current data values need to be numeric (castable to "Number" type), otherwise a runtime
* exception will be thrown.
*
*
* @return the sum of the current data
* @throws UnsupportedOperationException if the data cannot be cast to numbers
*/
Object sum() throws Exception;
/**
* Sums up the results provided by a given `mapper` function.
*
*
* This is a shorthand for `.map(mapper).sum()`, with the difference that here the numerical
* return type of the `mapper` is ensured.
*
*
* @param mapper function that returns the numbers to sum up
* @param the numeric type that is returned by the `mapper` function
* @return the summed up results of the `mapper` function
*/
Object sum(SerializableFunction mapper) throws Exception;
/**
* Counts the number of results.
*
* @return the total count of features or modifications, summed up over all timestamps
*/
Object count() throws Exception;
/**
* Gets all unique values of the results.
*
*
* For example, this can be used together with the OSMContributionView to get the total amount of
* unique users editing specific feature types.
*
*
* @return the set of distinct values
*/
Object uniq() throws Exception;
/**
* Gets all unique values of the results provided by a given mapper function.
*
* This is a shorthand for `.map(mapper).uniq()`.
*
* @param mapper function that returns some values
* @param the type that is returned by the `mapper` function
* @return a set of distinct values returned by the `mapper` function
*/
Object uniq(SerializableFunction mapper) throws Exception;
/**
* Counts all unique values of the results.
*
*
* For example, this can be used together with the OSMContributionView to get the number of unique
* users editing specific feature types.
*
*
* @return the set of distinct values
*/
Object countUniq() throws Exception;
/**
* Calculates the averages of the results.
*
*
* The current data values need to be numeric (castable to "Number" type), otherwise a runtime
* exception will be thrown.
*
*
* @return the average of the current data
* @throws UnsupportedOperationException if the data cannot be cast to numbers
*/
Object average() throws Exception;
/**
* Calculates the average of the results provided by a given `mapper` function.
*
* @param mapper function that returns the numbers to average
* @param the numeric type that is returned by the `mapper` function
* @return the average of the numbers returned by the `mapper` function
*/
Object average(SerializableFunction mapper) throws Exception;
/**
* Calculates the weighted average of the results provided by the `mapper` function.
*
*
* The mapper must return an object of the type `WeightedValue` which contains a numeric value
* associated with a (floating point) weight.
*
*
* @param mapper function that gets called for each entity snapshot or modification, needs to
* return the value and weight combination of numbers to average
* @return the weighted average of the numbers returned by the `mapper` function
*/
Object weightedAverage(SerializableFunction mapper) throws Exception;
/**
* Returns an estimate of the median of the results.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @return estimated median
*/
Object estimatedMedian() throws Exception;
/**
* Returns an estimate of the median of the results after applying the given map function.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @param mapper function that returns the numbers to generate the mean for
* @return estimated median
*/
Object estimatedMedian(SerializableFunction mapper) throws Exception;
/**
* Returns an estimate of a requested quantile of the results.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @param q the desired quantile to calculate (as a number between 0 and 1)
* @return estimated quantile boundary
*/
Object estimatedQuantile(double q) throws Exception;
/**
* Returns an estimate of a requested quantile of the results after applying the given map
* function.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @param mapper function that returns the numbers to generate the quantile for
* @param q the desired quantile to calculate (as a number between 0 and 1)
* @return estimated quantile boundary
*/
Object estimatedQuantile(SerializableFunction mapper, double q)
throws Exception;
/**
* Returns an estimate of the quantiles of the results.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
* @return estimated quantile boundaries
*/
Object estimatedQuantiles(Iterable q) throws Exception;
/**
* Returns an estimate of the quantiles of the results after applying the given map function.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @param mapper function that returns the numbers to generate the quantiles for
* @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
* @return estimated quantile boundaries
*/
Object estimatedQuantiles(
SerializableFunction mapper,
Iterable q
) throws Exception;
/**
* Returns a function that computes estimates of arbitrary quantiles of the results.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @return a function that computes estimated quantile boundaries
*/
Object estimatedQuantiles() throws Exception;
/**
* Returns a function that computes estimates of arbitrary quantiles of the results after applying
* the given map function.
*
*
* Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
* https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
*
*
* @param mapper function that returns the numbers to generate the quantiles for
* @return a function that computes estimated quantile boundaries
*/
Object estimatedQuantiles(SerializableFunction mapper) throws Exception;
/**
* Collects all results into List(s).
*
* @return list(s) with all results returned by the `mapper` function
*/
Object collect() throws Exception;
/**
* Returns all results as a Stream.
*
* @return a stream with all results returned by the `mapper` function
*/
Object stream() throws Exception;
}