org.heigit.ohsome.oshdb.api.mapreducer.MapReducer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of oshdb-api Show documentation
API to query the OpenStreetMap History Database. Includes MapReduce functionality to filter, analyze and aggregate data.
The newest version!
package org.heigit.ohsome.oshdb.api.mapreducer;

import static org.heigit.ohsome.oshdb.OSHDBBoundingBox.bboxWgs84Coordinates;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.collect.Streams;
import com.tdunning.math.stats.TDigest;
import java.io.IOException;
import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.SortedSet;
import java.util.TimeZone;
import java.util.TreeSet;
import java.util.function.DoubleUnaryOperator;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.heigit.ohsome.oshdb.OSHDB;
import org.heigit.ohsome.oshdb.OSHDBBoundingBox;
import org.heigit.ohsome.oshdb.OSHDBTag;
import org.heigit.ohsome.oshdb.OSHDBTimestamp;
import org.heigit.ohsome.oshdb.api.db.OSHDBDatabase;
import org.heigit.ohsome.oshdb.api.generic.NumberUtils;
import org.heigit.ohsome.oshdb.api.generic.WeightedValue;
import org.heigit.ohsome.oshdb.filter.AndOperator;
import org.heigit.ohsome.oshdb.filter.Filter;
import org.heigit.ohsome.oshdb.filter.FilterExpression;
import org.heigit.ohsome.oshdb.filter.FilterParser;
import org.heigit.ohsome.oshdb.filter.GeometryTypeFilter;
import org.heigit.ohsome.oshdb.filter.TagFilterEquals;
import org.heigit.ohsome.oshdb.filter.TagFilterEqualsAny;
import org.heigit.ohsome.oshdb.filter.TypeFilter;
import org.heigit.ohsome.oshdb.index.XYGridTree;
import org.heigit.ohsome.oshdb.index.XYGridTree.CellIdRange;
import org.heigit.ohsome.oshdb.osh.OSHEntity;
import org.heigit.ohsome.oshdb.osm.OSMEntity;
import org.heigit.ohsome.oshdb.osm.OSMType;
import org.heigit.ohsome.oshdb.util.OSHDBTagKey;
import org.heigit.ohsome.oshdb.util.exceptions.OSHDBInvalidTimestampException;
import org.heigit.ohsome.oshdb.util.exceptions.OSHDBNotImplementedException;
import org.heigit.ohsome.oshdb.util.function.OSHEntityFilter;
import org.heigit.ohsome.oshdb.util.function.OSMEntityFilter;
import org.heigit.ohsome.oshdb.util.function.SerializableBiFunction;
import org.heigit.ohsome.oshdb.util.function.SerializableBinaryOperator;
import org.heigit.ohsome.oshdb.util.function.SerializableConsumer;
import org.heigit.ohsome.oshdb.util.function.SerializableFunction;
import org.heigit.ohsome.oshdb.util.function.SerializablePredicate;
import org.heigit.ohsome.oshdb.util.function.SerializableSupplier;
import org.heigit.ohsome.oshdb.util.geometry.Geo;
import org.heigit.ohsome.oshdb.util.geometry.OSHDBGeometryBuilder;
import org.heigit.ohsome.oshdb.util.mappable.OSHDBMapReducible;
import org.heigit.ohsome.oshdb.util.mappable.OSMContribution;
import org.heigit.ohsome.oshdb.util.mappable.OSMEntitySnapshot;
import org.heigit.ohsome.oshdb.util.taginterpreter.DefaultTagInterpreter;
import org.heigit.ohsome.oshdb.util.taginterpreter.TagInterpreter;
import org.heigit.ohsome.oshdb.util.time.IsoDateTimeParser;
import org.heigit.ohsome.oshdb.util.time.OSHDBTimestampList;
import org.heigit.ohsome.oshdb.util.time.OSHDBTimestamps;
import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull;
import org.json.simple.parser.ParseException;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.Polygonal;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Main class of oshdb's "functional programming" API.
 *
 * It accepts a list of filters, transformation `map` functions a produces a result when calling
 * the `reduce` method (or one of its shorthand versions like `sum`, `count`, etc.).
 *
 * 
 * You can set a list of filters that are applied on the raw OSM data, for example you can filter:
 * 
 * 
 * geometrically by an area of interest (bbox or polygon)
 * by osm tags (key only or key/value)
 * by OSM type
 * custom filter callback
 * 
 *
 * Depending on the used data "view", the MapReducer produces either "snapshots" or evaluated
 * all modifications ("contributions") of the matching raw OSM data.
 *
 * These data can then be transformed arbitrarily by user defined `map` functions (which take one
 * of these entity snapshots or modifications as input an produce an arbitrary output) or `flatMap`
 * functions (which can return an arbitrary number of results per entity snapshot/contribution). It
 * is possible to chain together any number of transformation functions.
 *
 * Finally, one can either use one of the pre-defined result-generating functions (e.g. `sum`,
 * `count`, `average`, `uniq`), or specify a custom `reduce` procedure.
 *
 * If one wants to get results that are aggregated by timestamp (or some other index), one can
 * use the `aggregateByTimestamp` or `aggregateBy` functionality that automatically handles the
 * grouping of the output data.
 *
 * For more complex analyses, it is also possible to enable the grouping of the input data by
 * the respective OSM ID. This can be used to view at the whole history of entities at once.
 *
 * @param  the type that is returned by the currently set of mapper function. the next added
 *        mapper function will be called with a parameter of this type as input
 */
public abstract class MapReducer implements
    MapReducerSettings>, Mappable, MapReducerAggregations,
    MapAggregatable, X>, X>, Serializable {

  private static final Logger LOG = LoggerFactory.getLogger(MapReducer.class);
  protected static final String TAG_KEY_NOT_FOUND =
      "Tag key {} not found. No data will match this filter.";
  protected static final String TAG_NOT_FOUND =
      "Tag {}={} not found. No data will match this filter.";
  protected static final String EMPTY_TAG_LIST =
      "Empty tag value list. No data will match this filter.";
  protected static final String UNIMPLEMENTED_DATA_VIEW = "Unimplemented data view: %s";
  protected static final String UNSUPPORTED_GROUPING = "Unsupported grouping: %s";

  protected transient OSHDBDatabase oshdb;

  protected Long timeout = null;

  /** the class representing the used OSHDB view: either {@link OSMContribution} or
   * {@link OSMEntitySnapshot}. */
  Class viewClass;

  enum Grouping {
    NONE, BY_ID
  }

  Grouping grouping = Grouping.NONE;

  /**
   * Returns if the current backend can be canceled (e.g. in a query timeout).
   */
  public boolean isCancelable() {
    return false;
  }

  // utility objects
  private TagInterpreter tagInterpreter = null;

  // settings and filters
  protected OSHDBTimestampList tstamps = new OSHDBTimestamps(
      "2008-01-01",
      currentDate(),
      OSHDBTimestamps.Interval.MONTHLY
      );
  protected OSHDBBoundingBox bboxFilter = bboxWgs84Coordinates(-180.0, -90.0, 180.0, 90.0);
  private Geometry polyFilter = null;
  protected EnumSet typeFilter = EnumSet.of(OSMType.NODE, OSMType.WAY, OSMType.RELATION);
  private final List> preFilters = new ArrayList<>();
  private final List> filters = new ArrayList<>();
  final LinkedList mappers = new LinkedList<>();

  // basic constructor
  protected MapReducer(OSHDBDatabase oshdb, Class viewClass) {
    this.oshdb = oshdb;
    this.viewClass = viewClass;
  }

  // copy constructor
  protected MapReducer(MapReducer obj) {
    this.oshdb = obj.oshdb;

    this.viewClass = obj.viewClass;
    this.grouping = obj.grouping;

    this.tagInterpreter = obj.tagInterpreter;

    this.tstamps = obj.tstamps;
    this.bboxFilter = obj.bboxFilter;
    this.polyFilter = obj.polyFilter;
    this.typeFilter = obj.typeFilter.clone();
    this.preFilters.addAll(obj.preFilters);
    this.filters.addAll(obj.filters);
    this.mappers.addAll(obj.mappers);
  }

  @NotNull
  protected abstract MapReducer copy();

  // -----------------------------------------------------------------------------------------------
  // "Setting" methods and associated internal helpers
  // -----------------------------------------------------------------------------------------------

  /**
   * Sets the tagInterpreter to use in the analysis. The tagInterpreter is used internally to
   * determine the geometry type of osm entities (e.g. an osm way can become either a LineString or
   * a Polygon, depending on its tags). Normally, this is generated automatically for the user. But
   * for example, if one doesn't want to use the DefaultTagInterpreter, it is possible to use this
   * function to supply their own tagInterpreter.
   *
   * @param tagInterpreter the tagInterpreter object to use in the processing of osm entities
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @SuppressWarnings("unused")
  @Contract(pure = true)
  public MapReducer tagInterpreter(TagInterpreter tagInterpreter) {
    MapReducer ret = this.copy();
    ret.tagInterpreter = tagInterpreter;
    return ret;
  }

  // -----------------------------------------------------------------------------------------------
  // Filtering methods
  // -----------------------------------------------------------------------------------------------

  /**
   * Set the area of interest to the given bounding box. Only objects inside or clipped by this bbox
   * will be passed on to the analysis' `mapper` function.
   *
   * @param bboxFilter the bounding box to query the data in
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Override
  @Contract(pure = true)
  public MapReducer areaOfInterest(@NotNull OSHDBBoundingBox bboxFilter) {
    MapReducer ret = this.copy();
    if (this.polyFilter == null) {
      ret.bboxFilter = ret.bboxFilter.intersection(bboxFilter);
    } else {
      ret.polyFilter = Geo.clip(ret.polyFilter, bboxFilter);
      ret.bboxFilter = OSHDBGeometryBuilder.boundingBoxOf(ret.polyFilter.getEnvelopeInternal());
    }
    return ret;
  }

  /**
   * Set the area of interest to the given polygon. Only objects inside or clipped by this polygon
   * will be passed on to the analysis' `mapper` function.
   *
   * @param polygonFilter the bounding box to query the data in
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Override
  @Contract(pure = true)
  public  MapReducer areaOfInterest(@NotNull P polygonFilter) {
    MapReducer ret = this.copy();
    if (this.polyFilter == null) {
      ret.polyFilter = Geo.clip(polygonFilter, ret.bboxFilter);
    } else {
      ret.polyFilter = Geo.clip(polygonFilter, ret.getPolyFilter());
    }
    ret.bboxFilter = OSHDBGeometryBuilder.boundingBoxOf(ret.polyFilter.getEnvelopeInternal());
    return ret;
  }

  /**
   * Set the timestamps for which to perform the analysis.
   *
   * 

   * Depending on the *View*, this has slightly different semantics:
   * 
   * 
   * For the OSMEntitySnapshotView it will set the time slices at which to take the "snapshots"
   * 

   * For the OSMContributionView it will set the time interval in which to look for
   * osm contributions (only the first and last timestamp of this list are contributing).
   * 
   * Additionally, these timestamps are used in the `aggregateByTimestamp` functionality.
   *
   * @param tstamps an object (implementing the OSHDBTimestampList interface) which provides the
   *        timestamps to do the analysis for
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Contract(pure = true)
  public MapReducer timestamps(OSHDBTimestampList tstamps) {
    MapReducer ret = this.copy();
    ret.tstamps = tstamps;
    return ret;
  }

  /**
   * Set the timestamps for which to perform the analysis in a regular interval between a start and
   * end date.
   *
   * See {@link #timestamps(OSHDBTimestampList)} for further information.
   *
   * Supplied times are assumed to be in UTC (and the only allowed timezone designator is 'Z').
   * If a date parameter does not include a time part, midnight (00:00:00Z) of the respective
   * date is used.
   *
   * @param isoDateStart an ISO 8601 date string representing the start date of the analysis
   * @param isoDateEnd an ISO 8601 date string representing the end date of the analysis
   * @param interval the interval between the timestamps to be used in the analysis
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Contract(pure = true)
  public MapReducer timestamps(
      String isoDateStart, String isoDateEnd, OSHDBTimestamps.Interval interval
  ) {
    return this.timestamps(new OSHDBTimestamps(isoDateStart, isoDateEnd, interval));
  }

  /**
   * Sets a single timestamp for which to perform the analysis at.
   *
   * Useful in combination with the OSMEntitySnapshotView when not performing further aggregation
   * by timestamp.
   *
   * See {@link #timestamps(OSHDBTimestampList)} for further information.
   *
   * Supplied times are assumed to be in UTC (and the only allowed timezone designator is 'Z').
   * If a date parameter does not include a time part, midnight (00:00:00Z) of the respective
   * date is used.
   *
   * @param isoDate an ISO 8601 date string representing the date of the analysis
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Contract(pure = true)
  public MapReducer timestamps(String isoDate) {
    if (isOSMContributionViewQuery()) {
      LOG.warn("OSMContributionView requires two or more timestamps, but only one was supplied.");
    }
    return this.timestamps(isoDate, isoDate, new String[] {});
  }

  /**
   * Sets two timestamps (start and end date) for which to perform the analysis.
   *
   * Useful in combination with the OSMContributionView when not performing further aggregation
   * by timestamp.
   *
   * See {@link #timestamps(OSHDBTimestampList)} for further information.
   *
   * Supplied times are assumed to be in UTC (and the only allowed timezone designator is 'Z').
   * If a date parameter does not include a time part, midnight (00:00:00Z) of the respective
   * date is used.
   *
   * @param isoDateStart an ISO 8601 date string representing the start date of the analysis
   * @param isoDateEnd an ISO 8601 date string representing the end date of the analysis
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Contract(pure = true)
  public MapReducer timestamps(String isoDateStart, String isoDateEnd) {
    return this.timestamps(isoDateStart, isoDateEnd, new String[] {});
  }

  /**
   * Sets multiple arbitrary timestamps for which to perform the analysis.
   *
   * Note for programmers wanting to use this method to supply an arbitrary number (n>=1) of
   * timestamps: You may supply the same time string multiple times, which will be de-duplicated
   * internally. E.g. you can call the method like this:
   * .timestamps(dateArr[0], dateArr[0], dateArr)
   * 
   *
   * See {@link #timestamps(OSHDBTimestampList)} for further information.
   *
   * Supplied times are assumed to be in UTC (and the only allowed timezone designator is 'Z').
   * If a date parameter does not include a time part, midnight (00:00:00Z) of the respective
   * date is used.
   *
   * @param isoDateFirst an ISO 8601 date string representing the start date of the analysis
   * @param isoDateSecond an ISO 8601 date string representing the second date of the analysis
   * @param isoDateMore more ISO 8601 date strings representing the remaining timestamps of the
   *        analysis
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Contract(pure = true)
  public MapReducer timestamps(
      String isoDateFirst, String isoDateSecond, String... isoDateMore) {
    SortedSet timestamps = new TreeSet<>();
    timestamps.add(
        new OSHDBTimestamp(IsoDateTimeParser.parseIsoDateTime(isoDateFirst).toEpochSecond()));
    timestamps.add(
        new OSHDBTimestamp(IsoDateTimeParser.parseIsoDateTime(isoDateSecond).toEpochSecond()));
    for (String isoDate : isoDateMore) {
      timestamps.add(
          new OSHDBTimestamp(IsoDateTimeParser.parseIsoDateTime(isoDate).toEpochSecond()));
    }
    return this.timestamps(() -> timestamps);
  }

  @Contract(pure = true)
  private MapReducer osmTypeInternal(Set typeFilter) {
    MapReducer ret = this.copy();
    typeFilter = Sets.intersection(ret.typeFilter, typeFilter);
    if (typeFilter.isEmpty()) {
      ret.typeFilter = EnumSet.noneOf(OSMType.class);
    } else {
      ret.typeFilter = EnumSet.copyOf(typeFilter);
    }
    return ret;
  }

  @Contract(pure = true)
  private MapReducer osmTag(OSHDBTag tag) {
    MapReducer ret = this.copy();
    ret.preFilters.add(oshEntity -> oshEntity.hasTagKey(tag.getKey()));
    ret.filters.add(osmEntity -> osmEntity.getTags().hasTag(tag));
    return ret;
  }

  @Contract(pure = true)
  private MapReducer osmTag(OSHDBTagKey tagKey) {
    MapReducer ret = this.copy();
    ret.preFilters.add(oshEntity -> oshEntity.hasTagKey(tagKey));
    ret.filters.add(osmEntity -> osmEntity.getTags().hasTagKey(tagKey));
    return ret;
  }

  // -----------------------------------------------------------------------------------------------
  // "map", "flatMap" transformation methods
  // -----------------------------------------------------------------------------------------------

  /**
   * Set an arbitrary `map` transformation function.
   *
   * @param mapper function that will be applied to each data entry (osm entity snapshot or
   *        contribution)
   * @param  an arbitrary data type which is the return type of the transformation `map` function
   * @return a modified copy of this MapReducer object operating on the transformed type (<R>)
   */
  @Override
  @Contract(pure = true)
  public  MapReducer map(SerializableFunction mapper) {
    return map((o, ignored) -> mapper.apply(o));
  }

  // Some internal methods can also map the "root" object of the mapreducer's view.
  @Contract(pure = true)
  protected   MapReducer map(SerializableBiFunction mapper) {
    MapReducer ret = this.copy();
    ret.mappers.add(new MapFunction(mapper, false));
    @SuppressWarnings("unchecked") // after applying this mapper, we have a mapreducer of type R
    MapReducer result = (MapReducer) ret;
    return result;
  }

  /**
   * Set an arbitrary `flatMap` transformation function, which returns list with an arbitrary number
   * of results per input data entry. The results of this function will be "flattened", meaning that
   * they can be for example transformed again by setting additional `map` functions.
   *
   * @param flatMapper function that will be applied to each data entry (osm entity snapshot or
   *        contribution) and returns a list of results
   * @param  an arbitrary data type which is the return type of the transformation `map` function
   * @return a modified copy of this MapReducer object operating on the transformed type (<R>)
   */
  @Override
  @Contract(pure = true)
  public  MapReducer flatMap(SerializableFunction> flatMapper) {
    return flatMap((o, ignored) -> flatMapper.apply(o));
  }

  // Some internal methods can also flatMap the "root" object of the mapreducer's view.
  @Contract(pure = true)
  protected   MapReducer flatMap(SerializableBiFunction> flatMapper) {
    MapReducer ret = this.copy();
    ret.mappers.add(new MapFunction(flatMapper, true));
    @SuppressWarnings("unchecked") // after applying this mapper, we have a mapreducer of type R
    MapReducer result = (MapReducer) ret;
    return result;
  }

  /**
   * Adds a custom arbitrary filter that gets executed in the current transformation chain.
   *
   * @param f the filter function that determines if the respective data should be passed on (when f
   *        returns true) or discarded (when f returns false)
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Override
  @Contract(pure = true)
  public MapReducer filter(SerializablePredicate f) {
    MapReducer ret = this.copy();
    ret.mappers.add(new FilterFunction(f));
    return ret;
  }

  /**
   * Apply a custom filter expression to this query.
   *
   * @see oshdb-filter
   *      readme and {@link org.heigit.ohsome.oshdb.filter} for further information about how
   *      to create such a filter expression object.
   *
   * @param f the {@link org.heigit.ohsome.oshdb.filter.FilterExpression} to apply
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Override
  @Contract(pure = true)
  public MapReducer filter(FilterExpression f) {
    MapReducer ret = this.copy();
    ret.preFilters.add(f::applyOSH);
    ret.filters.add(f::applyOSM);
    // apply geometry filter as first map function
    final List remainingMappers = List.copyOf(ret.mappers);
    ret.mappers.clear();
    if (this.grouping == Grouping.NONE) {
      // no grouping -> directly filter using the geometries of the snapshot / contribution
      if (isOSMEntitySnapshotViewQuery()) {
        ret = ret.filter(x -> {
          OSMEntitySnapshot s = (OSMEntitySnapshot) x;
          return f.applyOSMEntitySnapshot(s);
        });
      } else if (isOSMContributionViewQuery()) {
        ret = ret.filter(x -> {
          OSMContribution c = (OSMContribution) x;
          return f.applyOSMContribution(c);
        });
      }
    } else if (this.grouping == Grouping.BY_ID) {
      // grouping by entity -> filter each list entry individually
      if (isOSMEntitySnapshotViewQuery()) {
        @SuppressWarnings("unchecked") MapReducer filteredListMapper = (MapReducer)
            ret.map(x -> (Collection) x)
                .map(snapshots -> snapshots.stream()
                    .filter(f::applyOSMEntitySnapshot)
                    .collect(Collectors.toCollection(ArrayList::new)))
                .filter(snapshots -> !snapshots.isEmpty());
        ret = filteredListMapper;
      } else if (isOSMContributionViewQuery()) {
        @SuppressWarnings("unchecked") MapReducer filteredListMapper = (MapReducer)
            ret.map(x -> (Collection) x)
                .map(contributions -> contributions.stream()
                    .filter(f::applyOSMContribution)
                    .collect(Collectors.toCollection(ArrayList::new)))
                .filter(contributions -> !contributions.isEmpty());
        ret = filteredListMapper;
      }
    } else {
      throw new UnsupportedOperationException(
          "filtering not implemented in grouping mode " + this.grouping.toString());
    }
    ret.mappers.addAll(remainingMappers);
    return optimizeFilters(ret, f);
  }

  /**
   * Apply a textual filter to this query.
   *
   * @see oshdb-filter
   *      readme for a description of the filter syntax.
   *
   * @param f the filter string to apply
   * @return a modified copy of this mapReducer (can be used to chain multiple commands together)
   */
  @Override
  @Contract(pure = true)
  public MapReducer filter(String f) {
    return this.filter(new FilterParser(oshdb.getTagTranslator()).parse(f));
  }

  // -----------------------------------------------------------------------------------------------
  // Grouping and Aggregation
  // Sets how the input data is "grouped", or the output data is "aggregated" into separate chunks.
  // -----------------------------------------------------------------------------------------------

  /**
   * Groups the input data (osm entity snapshot or contributions) by their respective entity's ids
   * before feeding them into further transformation functions. This can be used to do more complex
   * analysis on the osm data, that requires one to know about the full editing history of
   * individual osm entities, e.g., when looking for contributions which got reverted at a later
   * point in time.
   *
   * The values in the returned lists of snapshot or contribution objects are returned in their
   * natural order: i.e. sorted ascending by timestamp.
   *
   * This needs to be called before any `map` or `flatMap` transformation functions have been
   * set. Otherwise a runtime exception will be thrown.
   *
   * @return the MapReducer object which applies its transformations on (by entity id grouped) lists
   *         of the input data
   * @throws UnsupportedOperationException if this is called after some map (or flatMap) functions
   *         have already been set
   * @throws UnsupportedOperationException if this is called when a grouping has already been
   *         activated
   */
  @Contract(pure = true)
  public MapReducer> groupByEntity() throws UnsupportedOperationException {
    if (this.grouping != Grouping.NONE) {
      throw new UnsupportedOperationException("A grouping is already active on this MapReducer");
    }
    if (!this.mappers.isEmpty()) {
      // for convenience, we allow one to set this function even after some map functions were set.
      // if some map / flatMap functions were already set:
      // "rewind" them first, apply the grouping and then re-apply the map/flatMap functions
      // accordingly
      MapReducer ret = this.copy();
      List mapFunctions = new ArrayList<>(ret.mappers);
      ret.mappers.clear();
      ret.grouping = Grouping.BY_ID;
      @SuppressWarnings("unchecked")
      // now in the reduce step the backend will return a list of items
      MapReducer> listMapReducer = (MapReducer>) ret;
      for (MapFunction action : mapFunctions) {
        if (action.isFlatMapper()) {
          listMapReducer = listMapReducer.map((list, root) -> list.stream()
              .flatMap(s -> Streams.stream((Iterable) action.apply(s, root)))
              .collect(Collectors.toList()));
        } else {
          @SuppressWarnings("StaticPseudoFunctionalStyleMethod")
          MapReducer> mappedResult = listMapReducer.map((list, root) ->
              Lists.transform(list, x -> action.apply(x, root)));
          listMapReducer = mappedResult;
        }
      }
      @SuppressWarnings("unchecked") // now in the reduce step the backend will return a list of X
      MapReducer> result = listMapReducer.map(List.class::cast);
      return result;
    } else {
      MapReducer ret = this.copy();
      ret.grouping = Grouping.BY_ID;
      @SuppressWarnings("unchecked") // now in the reduce step the backend will return a list of X
      MapReducer> result = (MapReducer>) ret;
      return result;
    }
  }

  /**
   * Sets a custom aggregation function that is used to group output results into.
   *
   * @param indexer a function that will be called for each input element and returns a value that
   *        will be used to group the results by
   * @param  the data type of the values used to aggregate the output. has to be a comparable
   *        type
   * @param zerofill a collection of values that are expected to be present in the result
   * @return a MapAggregator object with the equivalent state (settings, filters, map function,
   *         etc.) of the current MapReducer object
   */
  @Contract(pure = true)
  public  & Serializable> MapAggregator aggregateBy(
      SerializableFunction indexer,
      Collection zerofill
  ) {
    return new MapAggregator<>(this, (data, ignored) -> indexer.apply(data), zerofill);
  }

  /**
   * Sets a custom aggregation function that is used to group output results into.
   *
   * @param indexer a function that will be called for each input element and returns a value that
   *        will be used to group the results by
   * @param  the data type of the values used to aggregate the output. has to be a comparable
   *        type
   * @return a MapAggregator object with the equivalent state (settings, filters, map function,
   *         etc.) of the current MapReducer object
   */
  @Override
  @Contract(pure = true)
  public  & Serializable> MapAggregator aggregateBy(
      SerializableFunction indexer
  ) {
    return this.aggregateBy(indexer, Collections.emptyList());
  }

  /**
   * Sets up automatic aggregation by timestamp.
   *
   * In the OSMEntitySnapshotView, the snapshots' timestamp will be used directly to aggregate
   * results into. In the OSMContributionView, the timestamps of the respective data modifications
   * will be matched to corresponding time intervals (that are defined by the `timestamps` setting
   * here).
   *
   * Cannot be used together with the `groupByEntity()` setting enabled.
   *
   * @return a MapAggregator object with the equivalent state (settings, filters, map function,
   *         etc.) of the current MapReducer object
   * @throws UnsupportedOperationException if this is called when the `groupByEntity()` mode has
   *         been activated
   */
  @Contract(pure = true)
  public MapAggregator aggregateByTimestamp()
      throws UnsupportedOperationException {
    if (this.grouping != Grouping.NONE) {
      throw new UnsupportedOperationException(
          "automatic aggregateByTimestamp() cannot be used together with the groupByEntity() "
              + "functionality -> try using aggregateByTimestamp(customTimestampIndex) instead");
    }

    // by timestamp indexing function -> for some views we need to match the input data to the list
    SerializableBiFunction indexer;
    if (isOSMContributionViewQuery()) {
      final TreeSet timestamps = new TreeSet<>(this.tstamps.get());
      indexer = (ignored, root) -> timestamps.floor(((OSMContribution) root).getTimestamp());
    } else if (isOSMEntitySnapshotViewQuery()) {
      indexer = (ignored, root) -> ((OSMEntitySnapshot) root).getTimestamp();
    } else {
      throw new UnsupportedOperationException(
          "automatic aggregateByTimestamp() only implemented for OSMContribution and "
              + "OSMEntitySnapshot -> try using aggregateByTimestamp(customTimestampIndex) instead"
      );
    }

    return new MapAggregator<>(this, indexer, this.getZerofillTimestamps());
  }

  /**
   * Sets up aggregation by a custom time index.
   *
   * The timestamps returned by the supplied indexing function are matched to the corresponding
   * time intervals.
   *
   * @param indexer a callback function that return a timestamp object for each given data. Note
   *                that if this function returns timestamps outside of the supplied timestamps()
   *                interval results may be undefined
   * @return a MapAggregator object with the equivalent state (settings,
   *         filters, map function, etc.) of the current MapReducer object
   */
  public MapAggregator aggregateByTimestamp(
      SerializableFunction indexer
  ) throws UnsupportedOperationException {
    final TreeSet timestamps = new TreeSet<>(this.tstamps.get());
    final OSHDBTimestamp minTime = timestamps.first();
    final OSHDBTimestamp maxTime = timestamps.last();
    return new MapAggregator<>(this, (data, ignored) -> {
      // match timestamps to the given timestamp list
      OSHDBTimestamp aggregationTimestamp = indexer.apply(data);
      if (aggregationTimestamp == null
          || aggregationTimestamp.compareTo(minTime) < 0
          || aggregationTimestamp.compareTo(maxTime) > 0) {
        throw new OSHDBInvalidTimestampException(
            "Aggregation timestamp outside of time query interval.");
      }
      return timestamps.floor(aggregationTimestamp);
    }, getZerofillTimestamps());
  }

  /**
   * Sets up automatic aggregation by geometries.
   *
   * Cannot be used together with the `groupByEntity()` setting enabled.
   *
   * @param geometries an associated list of polygons and identifiers
   * @param  the type of the identifers used to aggregate
   * @param  a polygonal geometry type
   * @return a MapAggregator object with the equivalent state (settings, filters, map function,
   *         etc.) of the current MapReducer object
   * @throws UnsupportedOperationException if this is called when the `groupByEntity()` mode has
   *         been activated
   * @throws UnsupportedOperationException when called after any map or flatMap functions are set
   */
  @Contract(pure = true)
  public  & Serializable, P extends Geometry & Polygonal>
      MapAggregator aggregateByGeometry(Map geometries)
      throws UnsupportedOperationException {
    if (this.grouping != Grouping.NONE) {
      throw new UnsupportedOperationException(
          "aggregateByGeometry() cannot be used together with the groupByEntity() functionality");
    }

    GeometrySplitter gs = new GeometrySplitter<>(geometries);

    var prevMapper = this.getMapper();
    SerializableFunction> prevFlatMapper =
        this.mappers.stream().allMatch(this::canUseFastPath)
            ? root -> (Iterable) prevMapper.apply(root)
                .map(Collections::singletonList)
                .orElse(Collections.emptyList())
            : this.getFlatMapper();
    MapReducer> mapRed;
    if (isOSMContributionViewQuery()) {
      mapRed = this.flatMap((ignored, root) ->
          gs.splitOSMContribution((OSMContribution) root).entrySet());
    } else if (isOSMEntitySnapshotViewQuery()) {
      mapRed = this.flatMap((ignored, root) ->
          gs.splitOSMEntitySnapshot((OSMEntitySnapshot) root).entrySet());
    } else {
      throw new UnsupportedOperationException(String.format(
          UNIMPLEMENTED_DATA_VIEW, this.viewClass));
    }
    MapAggregator mapAgg = mapRed
        .aggregateBy(Entry::getKey, geometries.keySet())
        .map(Entry::getValue)
        .flatMap(prevFlatMapper::apply);
    @SuppressWarnings("unchecked") // no mapper functions have been applied so the type is still X
    MapAggregator result = (MapAggregator) mapAgg;
    return result;
  }

  // -----------------------------------------------------------------------------------------------
  // Exposed generic reduce.
  // Can be used by experienced users of the api to implement complex queries.
  // These offer full flexibility, but are potentially a bit tricky to work with (see javadoc).
  // -----------------------------------------------------------------------------------------------

  /**
   * Generic map-reduce routine.
   *
   * 

   * The combination of the used types and identity/reducer functions must make "mathematical"
   * sense:
   * 

   * 
   * the accumulator and combiner functions need to be associative,
   * values generated by the identitySupplier factory must be an identity for the combiner
   * function: `combiner(identitySupplier(),x)` must be equal to `x`,
   * the combiner function must be compatible with the accumulator function: `combiner(u,
   * accumulator(identitySupplier(), t)) == accumulator.apply(u, t)`
   * 
   *
   * 
   * Functionally, this interface is similar to Java11 Stream's
   * reduce(identity,accumulator,combiner)
   * interface.
   * 
   *
   * @param identitySupplier a factory function that returns a new starting value to reduce results
   *        into (e.g. when summing values, one needs to start at zero)
   * @param accumulator a function that takes a result from the `mapper` function (type <R>)
   *        and an accumulation value (type <S>, e.g. the result of `identitySupplier()`) and
   *        returns the "sum" of the two; contrary to `combiner`, this function is allowed to alter
   *        (mutate) the state of the accumulation value (e.g. directly adding new values to an
   *        existing Set object)
   * @param combiner a function that calculates the "sum" of two <S> values; this function
   *        must be pure (have no side effects), and is not allowed to alter the state of the two
   *        input objects it gets!
   * @param  the data type used to contain the "reduced" (intermediate and final) results
   * @return the result of the map-reduce operation, the final result of the last call to the
   *         `combiner` function, after all `mapper` results have been aggregated (in the
   *         `accumulator` and `combiner` steps)
   * @throws UnsupportedOperationException if the used oshdb database backend doesn't implement
   *         the required reduce operation.
   * @throws Exception if during the reducing operation an exception happens (see the respective
   *         implementations for details).
   */
  @Override
  @Contract(pure = true)
  public  S reduce(
      SerializableSupplier identitySupplier,
      SerializableBiFunction accumulator,
      SerializableBinaryOperator combiner)
      throws Exception {
    checkTimeout();
    switch (this.grouping) {
      case NONE:
        if (this.mappers.stream().allMatch(this::canUseFastPath)) {
          final SerializableFunction> mapper = this.getMapper();
          if (isOSMContributionViewQuery()) {
            @SuppressWarnings("Convert2MethodRef")
            // having just `mapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
            final SerializableFunction> contributionMapper =
                data -> mapper.apply(data);
            return this.mapReduceCellsOSMContribution(
                contributionMapper,
                identitySupplier,
                accumulator,
                combiner
            );
          } else if (isOSMEntitySnapshotViewQuery()) {
            @SuppressWarnings("Convert2MethodRef")
            // having just `mapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
            final SerializableFunction> snapshotMapper =
                data -> mapper.apply(data);
            return this.mapReduceCellsOSMEntitySnapshot(
                snapshotMapper,
                identitySupplier,
                accumulator,
                combiner
            );
          } else {
            throw new UnsupportedOperationException(String.format(
                UNIMPLEMENTED_DATA_VIEW, this.viewClass));
          }
        } else {
          final SerializableFunction> flatMapper = this.getFlatMapper();
          if (isOSMContributionViewQuery()) {
            return this.flatMapReduceCellsOSMContributionGroupedById(
                (List inputList) -> {
                  List outputList = new LinkedList<>();
                  inputList.stream()
                      .map((SerializableFunction>) flatMapper::apply)
                      .forEach(data -> Iterables.addAll(outputList, data));
                  return outputList;
                }, identitySupplier, accumulator, combiner);
          } else if (isOSMEntitySnapshotViewQuery()) {
            return this.flatMapReduceCellsOSMEntitySnapshotGroupedById(
                (List inputList) -> {
                  List outputList = new LinkedList<>();
                  inputList.stream()
                      .map((SerializableFunction>) flatMapper::apply)
                      .forEach(data -> Iterables.addAll(outputList, data));
                  return outputList;
                }, identitySupplier, accumulator, combiner);
          } else {
            throw new UnsupportedOperationException(String.format(
                UNIMPLEMENTED_DATA_VIEW, this.viewClass));
          }
        }
      case BY_ID:
        final SerializableFunction> flatMapper;
        if (this.mappers.stream().allMatch(this::canUseFastPath)) {
          final SerializableFunction> mapper = this.getMapper();
          flatMapper = data -> mapper.apply(data)
              .map(Collections::singletonList)
              .orElse(Collections.emptyList());
        } else {
          flatMapper = this.getFlatMapper();
        }
        if (isOSMContributionViewQuery()) {
          @SuppressWarnings("Convert2MethodRef")
          // having just `flatMapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
          final SerializableFunction, Iterable> contributionFlatMapper =
              data -> flatMapper.apply(data);
          return this.flatMapReduceCellsOSMContributionGroupedById(
              contributionFlatMapper,
              identitySupplier,
              accumulator,
              combiner
          );
        } else if (isOSMEntitySnapshotViewQuery()) {
          @SuppressWarnings("Convert2MethodRef")
          // having just `flatMapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
          final SerializableFunction, Iterable> snapshotFlatMapper =
              data -> flatMapper.apply(data);
          return this.flatMapReduceCellsOSMEntitySnapshotGroupedById(
              snapshotFlatMapper,
              identitySupplier,
              accumulator,
              combiner
          );
        } else {
          throw new UnsupportedOperationException(String.format(
              UNIMPLEMENTED_DATA_VIEW, this.viewClass));
        }
      default:
        throw new UnsupportedOperationException(String.format(
            UNSUPPORTED_GROUPING, this.grouping));
    }
  }

  /**
   * Generic map-reduce routine (shorthand syntax).
   *
   * 
   * This variant is shorter to program than `reduce(identitySupplier, accumulator, combiner)`, but
   * can only be used if the result type is the same as the current `map`ped type <X>. Also
   * this variant can be less efficient since it cannot benefit from the mutability freedoms the
   * accumulator+combiner approach has.
   * 
   *
   * 
   * The combination of the used types and identity/reducer functions must make "mathematical"
   * sense:
   * 
   * 
   * the accumulator function needs to be associative,
   * values generated by the identitySupplier factory must be an identity for the accumulator
   * function: `accumulator(identitySupplier(),x)` must be equal to `x`,
   * 
   *
   * 
   * Functionally, this interface is similar to Java11 Stream's
   * reduce(identity,accumulator)
   * interface.
   * 
   *
   * @param identitySupplier a factory function that returns a new starting value to reduce results
   *        into (e.g. when summing values, one needs to start at zero)
   * @param accumulator a function that takes a result from the `mapper` function (type <X>)
   *        and an accumulation value (also of type <X>, e.g. the result of
   *        `identitySupplier()`) and returns the "sum" of the two; contrary to `combiner`, this
   *        function is not to alter (mutate) the state of the accumulation value (e.g. directly
   *        adding new values to an existing Set object)
   * @return the result of the map-reduce operation, the final result of the last call to the
   *         `combiner` function, after all `mapper` results have been aggregated (in the
   *         `accumulator` and `combiner` steps)
   */
  @Override
  @Contract(pure = true)
  public X reduce(SerializableSupplier identitySupplier,
      SerializableBinaryOperator accumulator) throws Exception {
    return this.reduce(identitySupplier, accumulator::apply, accumulator);
  }

  // -----------------------------------------------------------------------------------------------
  // "Quality of life" helper methods to use the map-reduce functionality more directly and easily
  // for typical queries.
  // Available are: sum, count, average, weightedAverage and uniq.
  // Each one can be used to get results aggregated by timestamp, aggregated by a custom index and
  // not aggregated totals.
  // -----------------------------------------------------------------------------------------------

  /**
   * Sums up the results.
   *
   * The current data values need to be numeric (castable to "Number" type), otherwise a runtime
   * exception will be thrown.
   *
   * @return the sum of the current data
   * @throws UnsupportedOperationException if the data cannot be cast to numbers
   */
  @Override
  @Contract(pure = true)
  public Number sum() throws Exception {
    return this.makeNumeric().reduce(() -> 0, NumberUtils::add);
  }

  /**
   * Sums up the results provided by a given `mapper` function.
   *
   * This is a shorthand for `.map(mapper).sum()`, with the difference that here the numerical
   * return type of the `mapper` is ensured.
   *
   * @param mapper function that returns the numbers to sum up
   * @param  the numeric type that is returned by the `mapper` function
   * @return the summed up results of the `mapper` function
   */
  @Override
  @Contract(pure = true)
  public  R sum(SerializableFunction mapper) throws Exception {
    return this.map(mapper).reduce(() -> (R) (Integer) 0, NumberUtils::add);
  }

  /**
   * Counts the number of results.
   *
   * @return the total count of features or modifications, summed up over all timestamps
   */
  @Override
  @Contract(pure = true)
  public Integer count() throws Exception {
    return this.sum(ignored -> 1);
  }

  /**
   * Gets all unique values of the results.
   *
   * For example, this can be used together with the OSMContributionView to get the total amount
   * of unique users editing specific feature types.
   *
   * @return the set of distinct values
   */
  @Override
  @Contract(pure = true)
  public Set uniq() throws Exception {
    return this.reduce(
        MapReducer::uniqIdentitySupplier,
        MapReducer::uniqAccumulator,
        MapReducer::uniqCombiner
    );
  }

  /**
   * Gets all unique values of the results provided by a given mapper function.
   *
   * This is a shorthand for `.map(mapper).uniq()`.
   *
   * @param mapper function that returns some values
   * @param  the type that is returned by the `mapper` function
   * @return a set of distinct values returned by the `mapper` function
   */
  @Override
  @Contract(pure = true)
  public  Set uniq(SerializableFunction mapper) throws Exception {
    return this.map(mapper).uniq();
  }

  /**
   * Counts all unique values of the results.
   *
   * For example, this can be used together with the OSMContributionView to get the number of
   * unique users editing specific feature types.
   *
   * @return the set of distinct values
   */
  @Override
  @Contract(pure = true)
  public Integer countUniq() throws Exception {
    return this.uniq().size();
  }

  /**
   * Calculates the averages of the results.
   *
   * The current data values need to be numeric (castable to "Number" type), otherwise a runtime
   * exception will be thrown.
   *
   * @return the average of the current data
   * @throws UnsupportedOperationException if the data cannot be cast to numbers
   */
  @Override
  @Contract(pure = true)
  public Double average() throws Exception {
    return this.makeNumeric().average(n -> n);
  }

  /**
   * Calculates the average of the results provided by a given `mapper` function.
   *
   * @param mapper function that returns the numbers to average
   * @param  the numeric type that is returned by the `mapper` function
   * @return the average of the numbers returned by the `mapper` function
   */
  @Override
  @Contract(pure = true)
  public  Double average(SerializableFunction mapper) throws Exception {
    return this.weightedAverage(data -> new WeightedValue(mapper.apply(data), 1.0));
  }

  /**
   * Calculates the weighted average of the results provided by the `mapper` function.
   *
   * The mapper must return an object of the type `WeightedValue` which contains a numeric value
   * associated with a (floating point) weight.
   *
   * @param mapper function that gets called for each entity snapshot or modification, needs to
   *        return the value and weight combination of numbers to average
   * @return the weighted average of the numbers returned by the `mapper` function
   */
  @Override
  @Contract(pure = true)
  public Double weightedAverage(SerializableFunction mapper) throws Exception {
    MutableWeightedDouble runningSums =
        this.map(mapper).reduce(
            MutableWeightedDouble::identitySupplier,
            MutableWeightedDouble::accumulator,
            MutableWeightedDouble::combiner
        );
    return runningSums.num / runningSums.weight;
  }

  /**
   * Returns an estimate of the median of the results.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @return estimated median
   */
  @Override
  @Contract(pure = true)
  public Double estimatedMedian() throws Exception {
    return this.estimatedQuantile(0.5);
  }

  /**
   * Returns an estimate of the median of the results after applying the given map function.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @param mapper function that returns the numbers to generate the mean for
   * @return estimated median
   */
  @Override
  @Contract(pure = true)
  public  Double estimatedMedian(SerializableFunction mapper)
      throws Exception {
    return this.estimatedQuantile(mapper, 0.5);
  }

  /**
   * Returns an estimate of a requested quantile of the results.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @param q the desired quantile to calculate (as a number between 0 and 1)
   * @return estimated quantile boundary
   */
  @Override
  @Contract(pure = true)
  public Double estimatedQuantile(double q) throws Exception {
    return this.makeNumeric().estimatedQuantile(n -> n, q);
  }

  /**
   * Returns an estimate of a requested quantile of the results after applying the given map
   * function.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @param mapper function that returns the numbers to generate the quantile for
   * @param q the desired quantile to calculate (as a number between 0 and 1)
   * @return estimated quantile boundary
   */
  @Override
  @Contract(pure = true)
  public  Double estimatedQuantile(SerializableFunction mapper, double q)
      throws Exception {
    return this.estimatedQuantiles(mapper).applyAsDouble(q);
  }

  /**
   * Returns an estimate of the quantiles of the results.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
   * @return estimated quantile boundaries
   */
  @Override
  @Contract(pure = true)
  public List estimatedQuantiles(Iterable q) throws Exception {
    return this.makeNumeric().estimatedQuantiles(n -> n, q);
  }

  /**
   * Returns an estimate of the quantiles of the results after applying the given map function.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @param mapper function that returns the numbers to generate the quantiles for
   * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
   * @return estimated quantile boundaries
   */
  @Override
  @Contract(pure = true)
  public  List estimatedQuantiles(
      SerializableFunction mapper,
      Iterable q
  ) throws Exception {
    return Streams.stream(q)
        .mapToDouble(Double::doubleValue)
        .map(this.estimatedQuantiles(mapper))
        .boxed()
        .collect(Collectors.toList());
  }

  /**
   * Returns a function that computes estimates of arbitrary quantiles of the results.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @return a function that computes estimated quantile boundaries
   */
  @Override
  @Contract(pure = true)
  public DoubleUnaryOperator estimatedQuantiles() throws Exception {
    return this.makeNumeric().estimatedQuantiles(n -> n);
  }

  /**
   * Returns a function that computes estimates of arbitrary quantiles of the results after applying
   * the given map function.
   *
   * 
   * Uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   * 
   *
   * @param mapper function that returns the numbers to generate the quantiles for
   * @return a function that computes estimated quantile boundaries
   */
  @Override
  @Contract(pure = true)
  public  DoubleUnaryOperator estimatedQuantiles(
      SerializableFunction mapper
  ) throws Exception {
    TDigest digest = this.digest(mapper);
    return digest::quantile;
  }

  /**
   * generates the t-digest of the complete result set. see:
   * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
   */
  @Contract(pure = true)
  private  TDigest digest(SerializableFunction mapper) throws Exception {
    return this.map(mapper).reduce(
        TdigestReducer::identitySupplier,
        TdigestReducer::accumulator,
        TdigestReducer::combiner
    );
  }

  // -----------------------------------------------------------------------------------------------
  // "Iterator" like helpers (stream, collect)
  // -----------------------------------------------------------------------------------------------

  /**
   * Iterates over each entity snapshot or contribution, and performs a single `action` on each one
   * of them.
   *
   * This method can be handy for testing purposes. But note that since the `action` doesn't
   * produce a return value, it must facilitate its own way of producing output.
   *
   * If you'd like to use such a "forEach" in a non-test use case, use `.stream().forEach()`
   * instead.
   *
   * @param action function that gets called for each transformed data entry
   * @deprecated only for testing purposes, use `.stream().forEach()` instead
   */
  @Deprecated
  @SuppressWarnings("ResultOfMethodCallIgnored")
  public void forEach(SerializableConsumer action) throws Exception {
    final Object ignored = new Object();
    this.map(data -> {
      action.accept(data);
      return ignored;
    }).reduce(() -> ignored, (ignored2, ignored3) -> ignored);
  }

  /**
   * Collects all results into a List.
   *
   * @return a list with all results returned by the `mapper` function
   */
  @Override
  @Contract(pure = true)
  public List collect() throws Exception {
    return this.reduce(
        MapReducer::collectIdentitySupplier,
        MapReducer::collectAccumulator,
        MapReducer::collectCombiner
    );
  }

  /**
   * Returns all results as a Stream.
   *
   * If the used oshdb database backend doesn't implement the stream operation directly, this
   * will fall back to executing `.collect().stream()` instead, which buffers all results in
   * memory first before returning them as a stream.
   *
   * @return a stream with all results returned by the `mapper` function
   */
  @Override
  @Contract(pure = true)
  public Stream stream() throws Exception {
    try {
      return this.streamInternal();
    } catch (OSHDBNotImplementedException e) {
      LOG.info("stream not directly supported by chosen backend, falling back to "
          + ".collect().stream()");
      return this.collect().stream();
    }
  }

  @Contract(pure = true)
  private Stream streamInternal() throws Exception {
    checkTimeout();
    switch (this.grouping) {
      case NONE:
        if (this.mappers.stream().allMatch(this::canUseFastPath)) {
          final SerializableFunction> mapper = this.getMapper();
          if (isOSMContributionViewQuery()) {
            @SuppressWarnings("Convert2MethodRef")
            // having just `mapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
            final SerializableFunction> contributionMapper =
                data -> mapper.apply(data);
            return this.mapStreamCellsOSMContribution(contributionMapper);
          } else if (isOSMEntitySnapshotViewQuery()) {
            @SuppressWarnings("Convert2MethodRef")
            // having just `mapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
            final SerializableFunction> snapshotMapper =
                data -> mapper.apply(data);
            return this.mapStreamCellsOSMEntitySnapshot(snapshotMapper);
          } else {
            throw new UnsupportedOperationException(String.format(
                UNIMPLEMENTED_DATA_VIEW, this.viewClass));
          }
        } else {
          final SerializableFunction> flatMapper = this.getFlatMapper();
          if (isOSMContributionViewQuery()) {
            return this.flatMapStreamCellsOSMContributionGroupedById(
                (List inputList) -> {
                  List outputList = new LinkedList<>();
                  inputList.stream()
                      .map((SerializableFunction>) flatMapper::apply)
                      .forEach(data -> Iterables.addAll(outputList, data));
                  return outputList;
                });
          } else if (isOSMEntitySnapshotViewQuery()) {
            return this.flatMapStreamCellsOSMEntitySnapshotGroupedById(
                (List inputList) -> {
                  List outputList = new LinkedList<>();
                  inputList.stream()
                      .map((SerializableFunction>) flatMapper::apply)
                      .forEach(data -> Iterables.addAll(outputList, data));
                  return outputList;
                });
          } else {
            throw new UnsupportedOperationException(String.format(
                UNIMPLEMENTED_DATA_VIEW, this.viewClass));
          }
        }
      case BY_ID:
        final SerializableFunction> flatMapper;
        if (this.mappers.stream().allMatch(this::canUseFastPath)) {
          final SerializableFunction> mapper = this.getMapper();
          flatMapper = data -> mapper.apply(data)
              .map(Collections::singletonList)
              .orElse(Collections.emptyList());
        } else {
          flatMapper = this.getFlatMapper();
        }
        if (isOSMContributionViewQuery()) {
          @SuppressWarnings("Convert2MethodRef")
          // having just `mapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
          final SerializableFunction, Iterable> contributionFlatMapper =
              data -> flatMapper.apply(data);
          return this.flatMapStreamCellsOSMContributionGroupedById(contributionFlatMapper);
        } else if (isOSMEntitySnapshotViewQuery()) {
          @SuppressWarnings("Convert2MethodRef")
          // having just `mapper::apply` here is problematic, see https://github.com/GIScience/oshdb/pull/37
          final SerializableFunction, Iterable> snapshotFlatMapper =
              data -> flatMapper.apply(data);
          return this.flatMapStreamCellsOSMEntitySnapshotGroupedById(snapshotFlatMapper);
        } else {
          throw new UnsupportedOperationException(String.format(
              UNIMPLEMENTED_DATA_VIEW, this.viewClass));
        }
      default:
        throw new UnsupportedOperationException(String.format(
            UNSUPPORTED_GROUPING, this.grouping));
    }
  }

  // -----------------------------------------------------------------------------------------------
  // Generic map-stream functions (internal).
  // These need to be implemented by the actual db/processing backend!
  // -----------------------------------------------------------------------------------------------

  protected abstract Stream mapStreamCellsOSMContribution(
      SerializableFunction> mapper) throws Exception;

  protected abstract Stream flatMapStreamCellsOSMContributionGroupedById(
      SerializableFunction, Iterable> mapper) throws Exception;

  protected abstract Stream mapStreamCellsOSMEntitySnapshot(
      SerializableFunction> mapper) throws Exception;

  protected abstract Stream flatMapStreamCellsOSMEntitySnapshotGroupedById(
      SerializableFunction, Iterable> mapper) throws Exception;

  // -----------------------------------------------------------------------------------------------
  // Generic map-reduce functions (internal).
  // These need to be implemented by the actual db/processing backend!
  // -----------------------------------------------------------------------------------------------

  /**
   * Generic map-reduce used by the `OSMContributionView`.
   *
   * 
   * The combination of the used types and identity/reducer functions must make "mathematical"
   * sense:
   * 
   * 
   * the accumulator and combiner functions need to be associative,
   * values generated by the identitySupplier factory must be an identity for the combiner
   * function: `combiner(identitySupplier(),x)` must be equal to `x`,
   * the combiner function must be compatible with the accumulator function: `combiner(u,
   * accumulator(identitySupplier(), t)) == accumulator.apply(u, t)`
   * 
   *
   * 
   * Functionally, this interface is similar to Java11 Stream's
   * reduce(identity,accumulator,combiner)
   * interface.
   * 
   *
   * @param mapper a function that's called for each `OSMContribution`
   * @param identitySupplier a factory function that returns a new starting value to reduce results
   *        into (e.g. when summing values, one needs to start at zero)
   * @param accumulator a function that takes a result from the `mapper` function (type <R>)
   *        and an accumulation value (type <S>, e.g. the result of `identitySupplier()`) and
   *        returns the "sum" of the two; contrary to `combiner`, this function is allowed to alter
   *        (mutate) the state of the accumulation value (e.g. directly adding new values to an
   *        existing Set object)
   * @param combiner a function that calculates the "sum" of two <S> values; this function
   *        must be pure (have no side effects), and is not allowed to alter the state of the two
   *        input objects it gets!
   * @param  the data type returned by the `mapper` function
   * @param  the data type used to contain the "reduced" (intermediate and final) results
   * @return the result of the map-reduce operation, the final result of the last call to the
   *         `combiner` function, after all `mapper` results have been aggregated (in the
   *         `accumulator` and `combiner` steps)
   */
  protected abstract  S mapReduceCellsOSMContribution(
      SerializableFunction> mapper,
      SerializableSupplier identitySupplier,
      SerializableBiFunction accumulator,
      SerializableBinaryOperator combiner
  ) throws Exception;

  /**
   * Generic "flat" version of the map-reduce used by the `OSMContributionView`, with by-osm-id
   * grouped input to the `mapper` function.
   *
   * 
   * Contrary to the "normal" map-reduce, the "flat" version adds the possibility to return any
   * number of results in the `mapper` function. Additionally, this interface provides the `mapper`
   * function with a list of all `OSMContribution`s of a particular OSM entity. This is used to do
   * more complex analyses that require the full edit history of the respective OSM entities as
   * input.
   * 
   *
   * 
   * The combination of the used types and identity/reducer functions must make "mathematical"
   * sense:
   * 
   * 
   * the accumulator and combiner functions need to be associative,
   * values generated by the identitySupplier factory must be an identity for the combiner
   * function: `combiner(identitySupplier(),x)` must be equal to `x`,
   * the combiner function must be compatible with the accumulator function: `combiner(u,
   * accumulator(identitySupplier(), t)) == accumulator.apply(u, t)`
   * 
   *
   * 
   * Functionally, this interface is similar to Java11 Stream's
   * reduce(identity,accumulator,combiner)
   * interface.
   * 
   *
   * @param mapper a function that's called for all `OSMContribution`s of a particular OSM entity;
   *        returns a list of results (which can have any number of entries).
   * @param identitySupplier a factory function that returns a new starting value to reduce results
   *        into (e.g. when summing values, one needs to start at zero)
   * @param accumulator a function that takes a result from the `mapper` function (type <R>)
   *        and an accumulation value (type <S>, e.g. the result of `identitySupplier()`) and
   *        returns the "sum" of the two; contrary to `combiner`, this function is allowed to alter
   *        (mutate) the state of the accumulation value (e.g. directly adding new values to an
   *        existing Set object)
   * @param combiner a function that calculates the "sum" of two <S> values; this function
   *        must be pure (have no side effects), and is not allowed to alter the state of the two
   *        input objects it gets!
   * @param  the data type returned by the `mapper` function
   * @param  the data type used to contain the "reduced" (intermediate and final) results
   * @return the result of the map-reduce operation, the final result of the last call to the
   *         `combiner` function, after all `mapper` results have been aggregated (in the
   *         `accumulator` and `combiner` steps)
   */
  protected abstract  S flatMapReduceCellsOSMContributionGroupedById(
      SerializableFunction, Iterable> mapper,
      SerializableSupplier identitySupplier,
      SerializableBiFunction accumulator,
      SerializableBinaryOperator combiner
  ) throws Exception;

  /**
   * Generic map-reduce used by the `OSMEntitySnapshotView`.
   *
   * 
   * The combination of the used types and identity/reducer functions must make "mathematical"
   * sense:
   * 
   * 
   * the accumulator and combiner functions need to be associative,
   * values generated by the identitySupplier factory must be an identity for the combiner
   * function: `combiner(identitySupplier(),x)` must be equal to `x`,
   * the combiner function must be compatible with the accumulator function: `combiner(u,
   * accumulator(identitySupplier(), t)) == accumulator.apply(u, t)`
   * 
   *
   * 
   * Functionally, this interface is similar to Java11 Stream's
   * reduce(identity,accumulator,combiner)
   * interface.
   * 
   *
   * @param mapper a function that's called for each `OSMEntitySnapshot`
   * @param identitySupplier a factory function that returns a new starting value to reduce results
   *        into (e.g. when summing values, one needs to start at zero)
   * @param accumulator a function that takes a result from the `mapper` function (type <R>)
   *        and an accumulation value (type <S>, e.g. the result of `identitySupplier()`) and
   *        returns the "sum" of the two; contrary to `combiner`, this function is allowed to alter
   *        (mutate) the state of the accumulation value (e.g. directly adding new values to an
   *        existing Set object)
   * @param combiner a function that calculates the "sum" of two <S> values; this function
   *        must be pure (have no side effects), and is not allowed to alter the state of the two
   *        input objects it gets!
   * @param  the data type returned by the `mapper` function
   * @param  the data type used to contain the "reduced" (intermediate and final) results
   * @return the result of the map-reduce operation, the final result of the last call to the
   *         `combiner` function, after all `mapper` results have been aggregated (in the
   *         `accumulator` and `combiner` steps)
   */
  protected abstract  S mapReduceCellsOSMEntitySnapshot(
      SerializableFunction> mapper,
      SerializableSupplier identitySupplier,
      SerializableBiFunction accumulator,
      SerializableBinaryOperator combiner
  ) throws Exception;

  /**
   * Generic "flat" version of the map-reduce used by the `OSMEntitySnapshotView`, with by-osm-id
   * grouped input to the `mapper` function.
   *
   * 
   * Contrary to the "normal" map-reduce, the "flat" version adds the possibility to return any
   * number of results in the `mapper` function. Additionally, this interface provides the `mapper`
   * function with a list of all `OSMContribution`s of a particular OSM entity. This is used to do
   * more complex analyses that require the full list of snapshots of the respective OSM entities as
   * input.
   * 
   *
   * 
   * The combination of the used types and identity/reducer functions must make "mathematical"
   * sense:
   * 
   * 
   * the accumulator and combiner functions need to be associative,
   * values generated by the identitySupplier factory must be an identity for the combiner
   * function: `combiner(identitySupplier(),x)` must be equal to `x`,
   * the combiner function must be compatible with the accumulator function: `combiner(u,
   * accumulator(identitySupplier(), t)) == accumulator.apply(u, t)`
   * 
   *
   * 
   * Functionally, this interface is similar to Java11 Stream's
   * reduce(identity,accumulator,combiner)
   * interface.
   * 
   *
   * @param mapper a function that's called for all `OSMEntitySnapshot`s of a particular OSM entity;
   *        returns a list of results (which can have any number of entries)
   * @param identitySupplier a factory function that returns a new starting value to reduce results
   *        into (e.g. when summing values, one needs to start at zero)
   * @param accumulator a function that takes a result from the `mapper` function (type <R>)
   *        and an accumulation value (type <S>, e.g. the result of `identitySupplier()`) and
   *        returns the "sum" of the two; contrary to `combiner`, this function is allowed to alter
   *        (mutate) the state of the accumulation value (e.g. directly adding new values to an
   *        existing Set object)
   * @param combiner a function that calculates the "sum" of two <S> values; this function
   *        must be pure (have no side effects), and is not allowed to alter the state of the two
   *        input objects it gets!
   * @param  the data type returned by the `mapper` function
   * @param  the data type used to contain the "reduced" (intermediate and final) results
   * @return the result of the map-reduce operation, the final result of the last call to the
   *         `combiner` function, after all `mapper` results have been aggregated (in the
   *         `accumulator` and `combiner` steps)
   */
  protected abstract  S flatMapReduceCellsOSMEntitySnapshotGroupedById(
      SerializableFunction, Iterable> mapper,
      SerializableSupplier identitySupplier,
      SerializableBiFunction accumulator,
      SerializableBinaryOperator combiner
  ) throws Exception;

  // -----------------------------------------------------------------------------------------------
  // Some helper methods for internal use in the mapReduce functions
  // -----------------------------------------------------------------------------------------------

  protected boolean isOSMContributionViewQuery() {
    return OSMContribution.class.isAssignableFrom(this.viewClass);
  }

  protected boolean isOSMEntitySnapshotViewQuery() {
    return OSMEntitySnapshot.class.isAssignableFrom(this.viewClass);
  }

  protected TagInterpreter getTagInterpreter() throws ParseException, IOException {
    if (this.tagInterpreter == null) {
      this.tagInterpreter = new DefaultTagInterpreter(oshdb.getTagTranslator());
    }
    return this.tagInterpreter;
  }

  // Helper that chains multiple oshEntity filters together
  protected OSHEntityFilter getPreFilter() {
    return this.preFilters.isEmpty()
        ? oshEntity -> true
        : oshEntity -> {
          for (SerializablePredicate filter : this.preFilters) {
            if (!filter.test(oshEntity)) {
              return false;
            }
          }
          return true;
        };
  }

  // Helper that chains multiple osmEntity filters together
  protected OSMEntityFilter getFilter() {
    return this.filters.isEmpty()
        ? osmEntity -> true
        : osmEntity -> {
          for (SerializablePredicate filter : this.filters) {
            if (!filter.test(osmEntity)) {
              return false;
            }
          }
          return true;
        };
  }

  // get all cell ids covered by the current area of interest's bounding box
  protected Iterable getCellIdRanges() {
    XYGridTree grid = new XYGridTree(OSHDB.MAXZOOM);
    if (this.bboxFilter == null
        || this.bboxFilter.getMinLongitude() >= this.bboxFilter.getMaxLongitude()
        || this.bboxFilter.getMinLatitude() >= this.bboxFilter.getMaxLatitude()) {
      // return an empty iterable if bbox is not set or empty
      LOG.warn("area of interest not set or empty");
      return Collections.emptyList();
    }
    return grid.bbox2CellIdRanges(this.bboxFilter, true);
  }

  // hack, so that we can use a variable that is of both Geometry and implements Polygonal (i.e.
  // Polygon or MultiPolygon) as required in further processing steps
  @SuppressWarnings("unchecked") // all setters only accept Polygonal geometries
  protected  P getPolyFilter() {
    return (P) this.polyFilter;
  }

  // concatenates all applied `map` functions
  private SerializableFunction> getMapper() {
    // todo: maybe we can somehow optimize this?? at least for special cases like
    // this.mappers.size() == 1
    return (SerializableFunction>) data -> {
      // working with raw Objects since we don't know the actual intermediate types ¯\_(ツ)_/¯
      Object result = data;
      for (MapFunction mapper : this.mappers) {
        if (mapper instanceof FilterFunction filter) {
          if (!filter.test(result)) {
            return Optional.empty();
          }
        } else if (mapper.isFlatMapper()) {
          assert false : "flatMap callback requested in getMapper";
          throw new UnsupportedOperationException("cannot flat map this");
        } else {
          result = mapper.apply(result, data);
        }
      }
      @SuppressWarnings("unchecked")
      // after applying all mapper functions, the result type is X
      X mappedResult = (X) result;
      return Optional.of(mappedResult);
    };
  }

  // concatenates all applied `flatMap` and `map` functions
  private SerializableFunction> getFlatMapper() {
    // todo: maybe we can somehow optimize this?? at least for special cases like
    // this.mappers.size() == 1
    return (SerializableFunction>) data -> {
      // working with raw objects since we don't know the actual intermediate types ¯\_(ツ)_/¯
      List