org.heigit.bigspatialdata.oshdb.api.mapreducer.MapReducer Maven / Gradle / Ivy
Show all versions of oshdb-api Show documentation
package org.heigit.bigspatialdata.oshdb.api.mapreducer;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.google.common.collect.Streams;
import com.tdunning.math.stats.TDigest;
import java.io.IOException;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.DoubleUnaryOperator;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.heigit.bigspatialdata.oshdb.OSHDB;
import org.heigit.bigspatialdata.oshdb.api.db.OSHDBDatabase;
import org.heigit.bigspatialdata.oshdb.api.db.OSHDBJdbc;
import org.heigit.bigspatialdata.oshdb.api.generic.NumberUtils;
import org.heigit.bigspatialdata.oshdb.api.generic.WeightedValue;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableBiFunction;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableBinaryOperator;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableConsumer;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableFunction;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializablePredicate;
import org.heigit.bigspatialdata.oshdb.api.generic.function.SerializableSupplier;
import org.heigit.bigspatialdata.oshdb.api.object.OSHDBMapReducible;
import org.heigit.bigspatialdata.oshdb.api.object.OSMContribution;
import org.heigit.bigspatialdata.oshdb.api.object.OSMEntitySnapshot;
import org.heigit.bigspatialdata.oshdb.index.XYGridTree;
import org.heigit.bigspatialdata.oshdb.index.XYGridTree.CellIdRange;
import org.heigit.bigspatialdata.oshdb.osh.OSHEntity;
import org.heigit.bigspatialdata.oshdb.osm.OSMEntity;
import org.heigit.bigspatialdata.oshdb.osm.OSMType;
import org.heigit.bigspatialdata.oshdb.util.OSHDBBoundingBox;
import org.heigit.bigspatialdata.oshdb.util.OSHDBTag;
import org.heigit.bigspatialdata.oshdb.util.OSHDBTagKey;
import org.heigit.bigspatialdata.oshdb.util.OSHDBTimestamp;
import org.heigit.bigspatialdata.oshdb.util.celliterator.CellIterator;
import org.heigit.bigspatialdata.oshdb.util.celliterator.ContributionType;
import org.heigit.bigspatialdata.oshdb.util.exceptions.OSHDBInvalidTimestampException;
import org.heigit.bigspatialdata.oshdb.util.exceptions.OSHDBKeytablesNotFoundException;
import org.heigit.bigspatialdata.oshdb.util.geometry.Geo;
import org.heigit.bigspatialdata.oshdb.util.geometry.OSHDBGeometryBuilder;
import org.heigit.bigspatialdata.oshdb.util.taginterpreter.DefaultTagInterpreter;
import org.heigit.bigspatialdata.oshdb.util.taginterpreter.TagInterpreter;
import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTag;
import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTagInterface;
import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTagKey;
import org.heigit.bigspatialdata.oshdb.util.tagtranslator.TagTranslator;
import org.heigit.bigspatialdata.oshdb.util.time.IsoDateTimeParser;
import org.heigit.bigspatialdata.oshdb.util.time.OSHDBTimestampList;
import org.heigit.bigspatialdata.oshdb.util.time.OSHDBTimestamps;
import org.heigit.bigspatialdata.oshdb.util.time.TimestampFormatter;
import org.heigit.ohsome.filter.AndOperator;
import org.heigit.ohsome.filter.Filter;
import org.heigit.ohsome.filter.FilterExpression;
import org.heigit.ohsome.filter.FilterParser;
import org.heigit.ohsome.filter.GeometryTypeFilter;
import org.heigit.ohsome.filter.TagFilterEquals;
import org.heigit.ohsome.filter.TagFilterEqualsAny;
import org.heigit.ohsome.filter.TypeFilter;
import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull;
import org.json.simple.parser.ParseException;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.Polygonal;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Main class of oshdb's "functional programming" API.
*
* It accepts a list of filters, transformation `map` functions a produces a result when calling
* the `reduce` method (or one of its shorthand versions like `sum`, `count`, etc.).
*
*
* You can set a list of filters that are applied on the raw OSM data, for example you can filter:
*
*
* - geometrically by an area of interest (bbox or polygon)
* - by osm tags (key only or key/value)
* - by OSM type
* - custom filter callback
*
*
* Depending on the used data "view", the MapReducer produces either "snapshots" or evaluated
* all modifications ("contributions") of the matching raw OSM data.
*
* These data can then be transformed arbitrarily by user defined `map` functions (which take one
* of these entity snapshots or modifications as input an produce an arbitrary output) or `flatMap`
* functions (which can return an arbitrary number of results per entity snapshot/contribution). It
* is possible to chain together any number of transformation functions.
*
* Finally, one can either use one of the pre-defined result-generating functions (e.g. `sum`,
* `count`, `average`, `uniq`), or specify a custom `reduce` procedure.
*
* If one wants to get results that are aggregated by timestamp (or some other index), one can
* use the `aggregateByTimestamp` or `aggregateBy` functionality that automatically handles the
* grouping of the output data.
*
* For more complex analyses, it is also possible to enable the grouping of the input data by
* the respective OSM ID. This can be used to view at the whole history of entities at once.
*
* @param the type that is returned by the currently set of mapper function. the next added
* mapper function will be called with a parameter of this type as input
*/
public abstract class MapReducer implements
MapReducerSettings>, Mappable, MapReducerAggregations,
MapAggregatable, X>, X>, Serializable {
private static final Logger LOG = LoggerFactory.getLogger(MapReducer.class);
protected OSHDBDatabase oshdb;
protected transient OSHDBJdbc keytables;
protected Long timeout = null;
// internal state
Class extends OSHDBMapReducible> forClass;
enum Grouping {
NONE, BY_ID
}
Grouping grouping = Grouping.NONE;
/**
* Returns if the current backend can be canceled (e.g. in a query timeout).
*/
public boolean isCancelable() {
return false;
}
// utility objects
private transient TagTranslator tagTranslator = null;
private TagInterpreter tagInterpreter = null;
// settings and filters
protected OSHDBTimestampList tstamps = new OSHDBTimestamps(
"2008-01-01",
TimestampFormatter.getInstance().date(new Date()),
OSHDBTimestamps.Interval.MONTHLY
);
protected OSHDBBoundingBox bboxFilter = new OSHDBBoundingBox(-180, -90, 180, 90);
private Geometry polyFilter = null;
protected EnumSet typeFilter = EnumSet.of(OSMType.NODE, OSMType.WAY, OSMType.RELATION);
private final List> preFilters = new ArrayList<>();
private final List> filters = new ArrayList<>();
final List mappers = new LinkedList<>();
// basic constructor
protected MapReducer(OSHDBDatabase oshdb, Class extends OSHDBMapReducible> forClass) {
this.oshdb = oshdb;
this.forClass = forClass;
}
// copy constructor
protected MapReducer(MapReducer> obj) {
this.oshdb = obj.oshdb;
this.keytables = obj.keytables;
this.forClass = obj.forClass;
this.grouping = obj.grouping;
this.tagTranslator = obj.tagTranslator;
this.tagInterpreter = obj.tagInterpreter;
this.tstamps = obj.tstamps;
this.bboxFilter = obj.bboxFilter;
this.polyFilter = obj.polyFilter;
this.typeFilter = obj.typeFilter.clone();
this.preFilters.addAll(obj.preFilters);
this.filters.addAll(obj.filters);
this.mappers.addAll(obj.mappers);
}
@NotNull
protected abstract MapReducer copy();
// -----------------------------------------------------------------------------------------------
// "Setting" methods and associated internal helpers
// -----------------------------------------------------------------------------------------------
/**
* Sets the keytables database to use in the calculations to resolve strings (osm tags, roles)
* into internally used identifiers. If this function is never called, the main database
* (specified during the construction of this object) is used for this.
*
* @param keytables the database to use for resolving strings into internal identifiers
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer keytables(OSHDBJdbc keytables) {
if (keytables != this.oshdb && this.oshdb instanceof OSHDBJdbc) {
Connection c = ((OSHDBJdbc) this.oshdb).getConnection();
boolean oshdbContainsKeytables = true;
try {
(new TagTranslator(c)).close();
} catch (OSHDBKeytablesNotFoundException e) {
// this is the expected path -> the oshdb doesn't have the key tables
oshdbContainsKeytables = false;
} catch (SQLException e) {
throw new RuntimeException(e);
}
if (oshdbContainsKeytables) {
LOG.warn("It looks like as if the current OSHDB comes with keytables included. "
+ "Usually this means that you should use this file's keytables "
+ "and should not set the keytables manually.");
}
}
MapReducer ret = this.copy();
ret.keytables = keytables;
return ret;
}
/**
* Sets the tagInterpreter to use in the analysis. The tagInterpreter is used internally to
* determine the geometry type of osm entities (e.g. an osm way can become either a LineString or
* a Polygon, depending on its tags). Normally, this is generated automatically for the user. But
* for example, if one doesn't want to use the DefaultTagInterpreter, it is possible to use this
* function to supply their own tagInterpreter.
*
* @param tagInterpreter the tagInterpreter object to use in the processing of osm entities
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@SuppressWarnings("unused")
@Contract(pure = true)
public MapReducer tagInterpreter(TagInterpreter tagInterpreter) {
MapReducer ret = this.copy();
ret.tagInterpreter = tagInterpreter;
return ret;
}
// -----------------------------------------------------------------------------------------------
// Filtering methods
// -----------------------------------------------------------------------------------------------
/**
* Set the area of interest to the given bounding box. Only objects inside or clipped by this bbox
* will be passed on to the analysis' `mapper` function.
*
* @param bboxFilter the bounding box to query the data in
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer areaOfInterest(@NotNull OSHDBBoundingBox bboxFilter) {
MapReducer ret = this.copy();
if (this.polyFilter == null) {
ret.bboxFilter = OSHDBBoundingBox.intersect(bboxFilter, ret.bboxFilter);
} else {
ret.polyFilter = Geo.clip(ret.polyFilter, bboxFilter);
ret.bboxFilter = OSHDBGeometryBuilder.boundingBoxOf(ret.polyFilter.getEnvelopeInternal());
}
return ret;
}
/**
* Set the area of interest to the given polygon. Only objects inside or clipped by this polygon
* will be passed on to the analysis' `mapper` function.
*
* @param polygonFilter the bounding box to query the data in
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer areaOfInterest(@NotNull P polygonFilter) {
MapReducer ret = this.copy();
if (this.polyFilter == null) {
ret.polyFilter = Geo.clip(polygonFilter, ret.bboxFilter);
} else {
ret.polyFilter = Geo.clip(polygonFilter, ret.getPolyFilter());
}
ret.bboxFilter = OSHDBGeometryBuilder.boundingBoxOf(ret.polyFilter.getEnvelopeInternal());
return ret;
}
/**
* Set the timestamps for which to perform the analysis.
*
*
* Depending on the *View*, this has slightly different semantics:
*
* -
* For the OSMEntitySnapshotView it will set the time slices at which to take the "snapshots"
*
-
* For the OSMContributionView it will set the time interval in which to look for
* osm contributions (only the first and last timestamp of this list are contributing).
*
* Additionally, these timestamps are used in the `aggregateByTimestamp` functionality.
*
* @param tstamps an object (implementing the OSHDBTimestampList interface) which provides the
* timestamps to do the analysis for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer timestamps(OSHDBTimestampList tstamps) {
MapReducer ret = this.copy();
ret.tstamps = tstamps;
return ret;
}
/**
* Set the timestamps for which to perform the analysis in a regular interval between a start and
* end date.
*
* See {@link #timestamps(OSHDBTimestampList)} for further information.
*
* @param isoDateStart an ISO 8601 date string representing the start date of the analysis
* @param isoDateEnd an ISO 8601 date string representing the end date of the analysis
* @param interval the interval between the timestamps to be used in the analysis
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer timestamps(
String isoDateStart, String isoDateEnd, OSHDBTimestamps.Interval interval
) {
return this.timestamps(new OSHDBTimestamps(isoDateStart, isoDateEnd, interval));
}
/**
* Sets a single timestamp for which to perform the analysis at.
*
* Useful in combination with the OSMEntitySnapshotView when not performing further aggregation
* by timestamp.
*
* See {@link #timestamps(OSHDBTimestampList)} for further information.
*
* @param isoDate an ISO 8601 date string representing the date of the analysis
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer timestamps(String isoDate) {
if (this.forClass.equals(OSMContribution.class)) {
LOG.warn("OSMContributionView requires two or more timestamps, but only one was supplied.");
}
return this.timestamps(isoDate, isoDate, new String[] {});
}
/**
* Sets two timestamps (start and end date) for which to perform the analysis.
*
* Useful in combination with the OSMContributionView when not performing further aggregation
* by timestamp.
*
* See {@link #timestamps(OSHDBTimestampList)} for further information.
*
* @param isoDateStart an ISO 8601 date string representing the start date of the analysis
* @param isoDateEnd an ISO 8601 date string representing the end date of the analysis
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer timestamps(String isoDateStart, String isoDateEnd) {
return this.timestamps(isoDateStart, isoDateEnd, new String[] {});
}
/**
* Sets multiple arbitrary timestamps for which to perform the analysis.
*
* Note for programmers wanting to use this method to supply an arbitrary number (n>=1) of
* timestamps: You may supply the same time string multiple times, which will be de-duplicated
* internally. E.g. you can call the method like this:
* .timestamps(dateArr[0], dateArr[0], dateArr)
*
*
* See {@link #timestamps(OSHDBTimestampList)} for further information.
*
* @param isoDateFirst an ISO 8601 date string representing the start date of the analysis
* @param isoDateSecond an ISO 8601 date string representing the second date of the analysis
* @param isoDateMore more ISO 8601 date strings representing the remaining timestamps of the
* analysis
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer timestamps(
String isoDateFirst, String isoDateSecond, String... isoDateMore) {
SortedSet timestamps = new TreeSet<>();
try {
timestamps.add(
new OSHDBTimestamp(IsoDateTimeParser.parseIsoDateTime(isoDateFirst).toEpochSecond())
);
timestamps.add(
new OSHDBTimestamp(IsoDateTimeParser.parseIsoDateTime(isoDateSecond).toEpochSecond())
);
for (String isoDate : isoDateMore) {
timestamps.add(
new OSHDBTimestamp(IsoDateTimeParser.parseIsoDateTime(isoDate).toEpochSecond())
);
}
} catch (Exception e) {
LOG.error("unable to parse ISO date string: " + e.getMessage());
}
return this.timestamps(() -> timestamps);
}
/**
* Limits the analysis to the given osm entity types.
*
* @param typeFilter the set of osm types to filter (e.g. `EnumSet.of(OSMType.WAY)`)
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmType(Set typeFilter) {
return osmTypeInternal(typeFilter);
}
@Contract(pure = true)
private MapReducer osmTypeInternal(Set typeFilter) {
MapReducer ret = this.copy();
typeFilter = Sets.intersection(ret.typeFilter, typeFilter);
if (typeFilter.isEmpty()) {
ret.typeFilter = EnumSet.noneOf(OSMType.class);
} else {
ret.typeFilter = EnumSet.copyOf(typeFilter);
}
return ret;
}
/**
* Adds a custom arbitrary filter that gets executed for each osm entity and determines if it
* should be considered for this analyis or not.
*
* @param f the filter function to call for each osm entity
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmEntityFilter(SerializablePredicate f) {
MapReducer ret = this.copy();
ret.filters.add(f);
return ret;
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have this tag key
* (with an arbitrary value).
*
* @param key the tag key to filter the osm entities for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmTag(String key) {
return this.osmTag(new OSMTagKey(key));
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have this tag key
* (with an arbitrary value), or this tag key and value.
*
* @param tag the tag (key, or key and value) to filter the osm entities for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmTag(OSMTagInterface tag) {
if (tag instanceof OSMTag) {
return this.osmTag((OSMTag) tag);
}
if (tag instanceof OSMTagKey) {
return this.osmTag((OSMTagKey) tag);
}
throw new UnsupportedOperationException("Unknown object implementing OSMTagInterface.");
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have this tag key
* (with an arbitrary value).
*
* @param key the tag key to filter the osm entities for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
private MapReducer osmTag(OSMTagKey key) {
OSHDBTagKey keyId = this.getTagTranslator().getOSHDBTagKeyOf(key);
if (!keyId.isPresentInKeytables()) {
LOG.warn("Tag key {} not found. No data will match this filter.", key.toString());
return osmTagEmptyResult();
}
return osmTag(keyId);
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have this tag key
* and value.
*
* @param key the tag to filter the osm entities for
* @param value the tag value to filter the osm entities for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmTag(String key, String value) {
return this.osmTag(new OSMTag(key, value));
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have this tag key
* and value.
*
* @param tag the tag (key-value pair or key=*) to filter the osm entities for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
private MapReducer osmTag(OSMTag tag) {
OSHDBTag keyValueId = this.getTagTranslator().getOSHDBTagOf(tag);
if (!keyValueId.isPresentInKeytables()) {
LOG.warn("Tag {}={} not found. No data will match this filter.",
tag.getKey(), tag.getValue());
return osmTagEmptyResult();
}
return osmTag(keyValueId);
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have this tag key
* and one of the given values.
*
* @param key the tag key to filter the osm entities for
* @param values an array of tag values to filter the osm entities for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmTag(String key, Collection values) {
OSHDBTagKey oshdbKey = this.getTagTranslator().getOSHDBTagKeyOf(key);
int keyId = oshdbKey.toInt();
if (!oshdbKey.isPresentInKeytables() || values.isEmpty()) {
LOG.warn(
(values.isEmpty()
? "Empty tag value list. No data will match this filter."
: "Tag key {} not found. No data will match this filter."),
key
);
return osmTagEmptyResult();
}
Set valueIds = new HashSet<>();
for (String value : values) {
OSHDBTag keyValueId = this.getTagTranslator().getOSHDBTagOf(key, value);
if (!keyValueId.isPresentInKeytables()) {
LOG.warn("Tag {}={} not found. No data will match this tag value.", key, value);
} else {
valueIds.add(keyValueId.getValue());
}
}
MapReducer ret = this.copy();
ret.preFilters.add(oshEntity -> oshEntity.hasTagKey(keyId));
ret.filters.add(osmEntity -> {
int[] tags = osmEntity.getRawTags();
for (int i = 0; i < tags.length; i += 2) {
if (tags[i] > keyId) {
break;
}
if (tags[i] == keyId) {
return valueIds.contains(tags[i + 1]);
}
}
return false;
});
return ret;
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have a tag with
* the given key and whose value matches the given regular expression pattern.
*
* @param key the tag key to filter the osm entities for
* @param valuePattern a regular expression which the tag value of the osm entity must match
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmTag(String key, Pattern valuePattern) {
OSHDBTagKey oshdbKey = this.getTagTranslator().getOSHDBTagKeyOf(key);
int keyId = oshdbKey.toInt();
if (!oshdbKey.isPresentInKeytables()) {
LOG.warn("Tag key {} not found. No data will match this filter.", key);
return osmTagEmptyResult();
}
MapReducer ret = this.copy();
ret.preFilters.add(oshEntity -> oshEntity.hasTagKey(keyId));
ret.filters.add(osmEntity -> {
int[] tags = osmEntity.getRawTags();
for (int i = 0; i < tags.length; i += 2) {
if (tags[i] > keyId) {
return false;
}
if (tags[i] == keyId) {
String value = this.getTagTranslator().getOSMTagOf(keyId, tags[i + 1]).getValue();
return valuePattern.matcher(value).matches();
}
}
return false;
});
return ret;
}
/**
* Adds an osm tag filter: The analysis will be restricted to osm entities that have at least one
* of the supplied tags (key=value pairs or key=*).
*
* @param tags the tags (key/value pairs or key=*) to filter the osm entities for
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer osmTag(Collection extends OSMTagInterface> tags) {
if (tags.isEmpty()) {
LOG.warn("Empty tag list. No data will match this filter.");
return osmTagEmptyResult();
}
// for the "pre"-filter which removes all entities which don't match at least one of the
// given tag keys
Set preKeyIds = new HashSet<>();
// sets of tag keys and tags for the concrete entity filter: either one of these must match
Set keyIds = new HashSet<>();
Set keyValueIds = new HashSet<>();
for (OSMTagInterface tag : tags) {
if (tag instanceof OSMTag) {
OSMTag keyValue = (OSMTag) tag;
OSHDBTag keyValueId = this.getTagTranslator().getOSHDBTagOf(keyValue);
if (!keyValueId.isPresentInKeytables()) {
LOG.warn("Tag {}={} not found. No data will match this tag value.",
keyValue.getKey(), keyValue.getValue());
} else {
preKeyIds.add(keyValueId.getKey());
keyValueIds.add(keyValueId);
}
} else {
OSHDBTagKey keyId = this.getTagTranslator().getOSHDBTagKeyOf((OSMTagKey) tag);
preKeyIds.add(keyId.toInt());
keyIds.add(keyId.toInt());
}
}
MapReducer ret = this.copy();
ret.preFilters.add(oshEntity -> {
for (int key : oshEntity.getRawTagKeys()) {
if (preKeyIds.contains(key)) {
return true;
}
}
return false;
});
ret.filters.add(osmEntity -> {
for (OSHDBTag oshdbTag : osmEntity.getTags()) {
if (keyIds.contains(oshdbTag.getKey()) || keyValueIds.contains(oshdbTag)) {
return true;
}
}
return false;
});
return ret;
}
@Contract(pure = true)
private MapReducer osmTag(OSHDBTag tag) {
MapReducer ret = this.copy();
ret.preFilters.add(oshEntity -> oshEntity.hasTagKey(tag.getKey()));
ret.filters.add(osmEntity -> osmEntity.hasTagValue(tag.getKey(), tag.getValue()));
return ret;
}
@Contract(pure = true)
private MapReducer osmTag(OSHDBTagKey tagKey) {
MapReducer ret = this.copy();
ret.preFilters.add(oshEntity -> oshEntity.hasTagKey(tagKey));
ret.filters.add(osmEntity -> osmEntity.hasTagKey(tagKey));
return ret;
}
@Contract(pure = true)
private MapReducer osmTagEmptyResult() {
MapReducer ret = this.copy();
ret.preFilters.add(ignored -> false);
ret.filters.add(ignored -> false);
return ret;
}
// -----------------------------------------------------------------------------------------------
// "map", "flatMap" transformation methods
// -----------------------------------------------------------------------------------------------
/**
* Set an arbitrary `map` transformation function.
*
* @param mapper function that will be applied to each data entry (osm entity snapshot or
* contribution)
* @param an arbitrary data type which is the return type of the transformation `map` function
* @return a modified copy of this MapReducer object operating on the transformed type (<R>)
*/
@Contract(pure = true)
public MapReducer map(SerializableFunction mapper) {
MapReducer> ret = this.copy();
ret.mappers.add(new MapFunction(mapper, false));
@SuppressWarnings("unchecked") // after applying this mapper, we have a mapreducer of type R
MapReducer result = (MapReducer) ret;
return result;
}
/**
* Set an arbitrary `flatMap` transformation function, which returns list with an arbitrary number
* of results per input data entry. The results of this function will be "flattened", meaning that
* they can be for example transformed again by setting additional `map` functions.
*
* @param flatMapper function that will be applied to each data entry (osm entity snapshot or
* contribution) and returns a list of results
* @param an arbitrary data type which is the return type of the transformation `map` function
* @return a modified copy of this MapReducer object operating on the transformed type (<R>)
*/
@Contract(pure = true)
public MapReducer flatMap(SerializableFunction> flatMapper) {
MapReducer> ret = this.copy();
ret.mappers.add(new MapFunction(flatMapper, true));
@SuppressWarnings("unchecked") // after applying this mapper, we have a mapreducer of type R
MapReducer result = (MapReducer) ret;
return result;
}
/**
* Adds a custom arbitrary filter that gets executed in the current transformation chain.
*
* @param f the filter function that determines if the respective data should be passed on (when f
* returns true) or discarded (when f returns false)
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer filter(SerializablePredicate f) {
return this
.flatMap(data -> f.test(data) ? Collections.singletonList(data) : Collections.emptyList());
}
/**
* Apply a custom "ohsome" filter expression to this query.
*
* See https://gitlab.gistools.geog.uni-heidelberg.de/giscience/big-data/ohsome/libs/ohsome-filter#ohsome-filter
* and https://docs.ohsome.org/java/ohsome-filter
* for further information about how to create such a filter expression object.
*
* @param f the filter expression to apply to the mapReducer
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer filter(FilterExpression f) {
MapReducer ret = this.copy();
ret.preFilters.add(f::applyOSH);
ret.filters.add(f::applyOSM);
// apply geometry filter as first map function
final List remainingMappers = List.copyOf(ret.mappers);
ret.mappers.clear();
if (this.grouping == Grouping.NONE) {
// no grouping -> directly filter using the geometries of the snapshot / contribution
if (ret.forClass.equals(OSMEntitySnapshot.class)) {
ret = ret.filter(x -> {
OSMEntitySnapshot s = (OSMEntitySnapshot) x;
return f.applyOSMGeometry(s.getEntity(), s::getGeometry);
});
} else if (ret.forClass.equals(OSMContribution.class)) {
ret = ret.filter(x -> {
OSMContribution c = (OSMContribution) x;
if (c.is(ContributionType.CREATION)) {
return f.applyOSMGeometry(c.getEntityAfter(), c::getGeometryAfter);
} else if (c.is(ContributionType.DELETION)) {
return f.applyOSMGeometry(c.getEntityBefore(), c::getGeometryBefore);
} else {
return f.applyOSMGeometry(c.getEntityBefore(), c::getGeometryBefore)
|| f.applyOSMGeometry(c.getEntityAfter(), c::getGeometryAfter);
}
});
}
} else if (this.grouping == Grouping.BY_ID) {
// grouping by entity -> filter each list entry individually
if (ret.forClass.equals(OSMEntitySnapshot.class)) {
@SuppressWarnings("unchecked") MapReducer filteredListMapper = (MapReducer)
ret.map(x -> (Collection) x)
.map(snapshots -> snapshots.stream()
.filter(s -> f.applyOSMGeometry(s.getEntity(), s::getGeometry))
.collect(Collectors.toCollection(ArrayList::new)))
.filter(snapshots -> !snapshots.isEmpty());
ret = filteredListMapper;
} else if (ret.forClass.equals(OSMContribution.class)) {
@SuppressWarnings("unchecked") MapReducer filteredListMapper = (MapReducer)
ret.map(x -> (Collection) x)
.map(contributions -> contributions.stream()
.filter(c -> {
if (c.is(ContributionType.CREATION)) {
return f.applyOSMGeometry(c.getEntityAfter(), c::getGeometryAfter);
} else if (c.is(ContributionType.DELETION)) {
return f.applyOSMGeometry(c.getEntityBefore(), c::getGeometryBefore);
} else {
return f.applyOSMGeometry(c.getEntityBefore(), c::getGeometryBefore)
|| f.applyOSMGeometry(c.getEntityAfter(), c::getGeometryAfter);
}
})
.collect(Collectors.toCollection(ArrayList::new)))
.filter(contributions -> !contributions.isEmpty());
ret = filteredListMapper;
}
} else {
throw new UnsupportedOperationException(
"filtering not implemented in grouping mode " + this.grouping.toString());
}
ret.mappers.addAll(remainingMappers);
return optimizeFilters(ret, f);
}
/**
* Apply a custom "ohsome" filter to this query.
*
* See https://gitlab.gistools.geog.uni-heidelberg.de/giscience/big-data/ohsome/libs/ohsome-filter#syntax
* for a description of the ohsome filter syntax.
*
* @param f the ohsome filter string to apply to the mapReducer
* @return a modified copy of this mapReducer (can be used to chain multiple commands together)
*/
@Contract(pure = true)
public MapReducer filter(String f) {
return this.filter(new FilterParser(this.getTagTranslator()).parse(f));
}
// -----------------------------------------------------------------------------------------------
// Grouping and Aggregation
// Sets how the input data is "grouped", or the output data is "aggregated" into separate chunks.
// -----------------------------------------------------------------------------------------------
/**
* Groups the input data (osm entity snapshot or contributions) by their respective entity's ids
* before feeding them into further transformation functions. This can be used to do more complex
* analysis on the osm data, that requires one to know about the full editing history of
* individual osm entities, e.g., when looking for contributions which got reverted at a later
* point in time.
*
* The values in the returned lists of snapshot or contribution objects are returned in their
* natural order: i.e. sorted ascending by timestamp.
*
* This needs to be called before any `map` or `flatMap` transformation functions have been
* set. Otherwise a runtime exception will be thrown.
*
* @return the MapReducer object which applies its transformations on (by entity id grouped) lists
* of the input data
* @throws UnsupportedOperationException if this is called after some map (or flatMap) functions
* have already been set
* @throws UnsupportedOperationException if this is called when a grouping has already been
* activated
*/
@Contract(pure = true)
public MapReducer> groupByEntity() throws UnsupportedOperationException {
if (!this.mappers.isEmpty()) {
throw new UnsupportedOperationException(
"groupByEntity() must be called before any `map` or `flatMap` "
+ "transformation functions have been set"
);
}
if (this.grouping != Grouping.NONE) {
throw new UnsupportedOperationException("A grouping is already active on this MapReducer");
}
MapReducer ret = this.copy();
ret.grouping = Grouping.BY_ID;
@SuppressWarnings("unchecked") // now in the reduce step the backend will return a list of items
MapReducer> result = (MapReducer>) ret;
return result;
}
/**
* Sets a custom aggregation function that is used to group output results into.
*
* @param indexer a function that will be called for each input element and returns a value that
* will be used to group the results by
* @param the data type of the values used to aggregate the output. has to be a comparable
* type
* @param zerofill a collection of values that are expected to be present in the result
* @return a MapAggregator object with the equivalent state (settings, filters, map function,
* etc.) of the current MapReducer object
*/
@Contract(pure = true)
public & Serializable> MapAggregator aggregateBy(
SerializableFunction indexer,
Collection zerofill
) {
return new MapAggregator<>(this, indexer, zerofill);
}
/**
* Sets a custom aggregation function that is used to group output results into.
*
* @param indexer a function that will be called for each input element and returns a value that
* will be used to group the results by
* @param the data type of the values used to aggregate the output. has to be a comparable
* type
* @return a MapAggregator object with the equivalent state (settings, filters, map function,
* etc.) of the current MapReducer object
*/
@Contract(pure = true)
public & Serializable> MapAggregator aggregateBy(
SerializableFunction indexer
) {
return this.aggregateBy(indexer, Collections.emptyList());
}
/**
* Sets up automatic aggregation by timestamp.
*
* In the OSMEntitySnapshotView, the snapshots' timestamp will be used directly to aggregate
* results into. In the OSMContributionView, the timestamps of the respective data modifications
* will be matched to corresponding time intervals (that are defined by the `timestamps` setting
* here).
*
* Cannot be used together with the `groupByEntity()` setting enabled.
*
* @return a MapAggregator object with the equivalent state (settings, filters, map function,
* etc.) of the current MapReducer object
* @throws UnsupportedOperationException if this is called when the `groupByEntity()` mode has
* been activated
*/
@Contract(pure = true)
public MapAggregator aggregateByTimestamp()
throws UnsupportedOperationException {
if (this.grouping != Grouping.NONE) {
throw new UnsupportedOperationException(
"automatic aggregateByTimestamp() cannot be used together with the groupByEntity() "
+ "functionality -> try using aggregateByTimestamp(customTimestampIndex) instead"
);
}
// by timestamp indexing function -> for some views we need to match the input data to the list
SerializableFunction indexer;
if (this.forClass.equals(OSMContribution.class)) {
final TreeSet timestamps = new TreeSet<>(this.tstamps.get());
indexer = data -> timestamps.floor(((OSMContribution) data).getTimestamp());
} else if (this.forClass.equals(OSMEntitySnapshot.class)) {
indexer = data -> ((OSMEntitySnapshot) data).getTimestamp();
} else {
throw new UnsupportedOperationException(
"automatic aggregateByTimestamp() only implemented for OSMContribution and "
+ "OSMEntitySnapshot -> try using aggregateByTimestamp(customTimestampIndex) instead"
);
}
if (this.mappers.size() > 0) {
// for convenience we allow one to set this function even after some map functions were set.
// if some map / flatMap functions were already set:
// "rewind" them first, apply the indexer and then re-apply the map/flatMap functions
// accordingly
MapReducer ret = this.copy();
List mappers = new LinkedList<>(ret.mappers);
ret.mappers.clear();
MapAggregator mapAggregator =
new MapAggregator<>(ret, indexer, this.getZerofillTimestamps());
for (MapFunction action : mappers) {
if (action.isFlatMapper()) {
@SuppressWarnings("unchecked") // applying untyped function (we don't know interm. types)
MapAggregator flatMappedMapAggregator = mapAggregator.flatMap(action);
mapAggregator = flatMappedMapAggregator;
} else {
@SuppressWarnings("unchecked") // applying untyped function (we don't know interm. types)
MapAggregator mappedMapAggregator = mapAggregator.map(action);
mapAggregator = mappedMapAggregator;
}
}
@SuppressWarnings("unchecked") // after applying all (flat)map functions the final type is X
MapAggregator result = (MapAggregator) mapAggregator;
return result;
} else {
return new MapAggregator<>(this, indexer, this.getZerofillTimestamps());
}
}
/**
* Sets up aggregation by a custom time index.
*
* The timestamps returned by the supplied indexing function are matched to the corresponding
* time intervals.
*
* @param indexer a callback function that return a timestamp object for each given data. Note
* that if this function returns timestamps outside of the supplied timestamps()
* interval results may be undefined
* @return a MapAggregator object with the equivalent state (settings,
* filters, map function, etc.) of the current MapReducer object
*/
public MapAggregator aggregateByTimestamp(
SerializableFunction indexer
) throws UnsupportedOperationException {
final TreeSet timestamps = new TreeSet<>(this.tstamps.get());
final OSHDBTimestamp minTime = timestamps.first();
final OSHDBTimestamp maxTime = timestamps.last();
return new MapAggregator(this, data -> {
// match timestamps to the given timestamp list
OSHDBTimestamp aggregationTimestamp = indexer.apply(data);
if (aggregationTimestamp == null
|| aggregationTimestamp.compareTo(minTime) < 0
|| aggregationTimestamp.compareTo(maxTime) > 0) {
throw new OSHDBInvalidTimestampException(
"Aggregation timestamp outside of time query interval."
);
}
return timestamps.floor(aggregationTimestamp);
}, getZerofillTimestamps());
}
/**
* Sets up automatic aggregation by geometries.
*
* Cannot be used together with the `groupByEntity()` setting enabled.
*
* @return a MapAggregator object with the equivalent state (settings, filters, map function,
* etc.) of the current MapReducer object
* @throws UnsupportedOperationException if this is called when the `groupByEntity()` mode has
* been activated
* @throws UnsupportedOperationException when called after any map or flatMap functions are set
*/
@Contract(pure = true)
public & Serializable, P extends Geometry & Polygonal>
MapAggregator aggregateByGeometry(Map geometries) throws
UnsupportedOperationException {
if (this.grouping != Grouping.NONE) {
throw new UnsupportedOperationException(
"aggregateByGeometry() cannot be used together with the groupByEntity() functionality"
);
}
GeometrySplitter gs = new GeometrySplitter<>(geometries);
if (this.mappers.size() > 0) {
throw new UnsupportedOperationException(
"please call aggregateByGeometry before setting any map or flatMap functions"
);
} else {
MapAggregator ret;
if (this.forClass.equals(OSMContribution.class)) {
ret = this.flatMap(x -> gs.splitOSMContribution((OSMContribution) x).entrySet())
.aggregateBy(Entry::getKey, geometries.keySet()).map(Entry::getValue);
} else if (this.forClass.equals(OSMEntitySnapshot.class)) {
ret = this.flatMap(x -> gs.splitOSMEntitySnapshot((OSMEntitySnapshot) x).entrySet())
.aggregateBy(Entry::getKey, geometries.keySet()).map(Entry::getValue);
} else {
throw new UnsupportedOperationException(
"aggregateByGeometry not implemented for objects of type: " + this.forClass.toString()
);
}
@SuppressWarnings("unchecked") // no mapper functions have been applied so the type is still X
MapAggregator result = (MapAggregator) ret;
return result;
}
}
// -----------------------------------------------------------------------------------------------
// Exposed generic reduce.
// Can be used by experienced users of the api to implement complex queries.
// These offer full flexibility, but are potentially a bit tricky to work with (see javadoc).
// -----------------------------------------------------------------------------------------------
/**
* Generic map-reduce routine.
*
*
* The combination of the used types and identity/reducer functions must make "mathematical"
* sense:
*
*
* - the accumulator and combiner functions need to be associative,
* - values generated by the identitySupplier factory must be an identity for the combiner
* function: `combiner(identitySupplier(),x)` must be equal to `x`,
* - the combiner function must be compatible with the accumulator function: `combiner(u,
* accumulator(identitySupplier(), t)) == accumulator.apply(u, t)`
*
*
*
* Functionally, this interface is similar to Java11 Stream's
* reduce(identity,accumulator,combiner)
* interface.
*
*
* @param identitySupplier a factory function that returns a new starting value to reduce results
* into (e.g. when summing values, one needs to start at zero)
* @param accumulator a function that takes a result from the `mapper` function (type <R>)
* and an accumulation value (type <S>, e.g. the result of `identitySupplier()`) and
* returns the "sum" of the two; contrary to `combiner`, this function is allowed to alter
* (mutate) the state of the accumulation value (e.g. directly adding new values to an
* existing Set object)
* @param combiner a function that calculates the "sum" of two <S> values; this function
* must be pure (have no side effects), and is not allowed to alter the state of the two
* input objects it gets!
* @param the data type used to contain the "reduced" (intermediate and final) results
* @return the result of the map-reduce operation, the final result of the last call to the
* `combiner` function, after all `mapper` results have been aggregated (in the
* `accumulator` and `combiner` steps)
* @throws UnsupportedOperationException if the used oshdb database backend doesn't implement
* the required reduce operation.
* @throws Exception if during the reducing operation an exception happens (see the respective
* implementations for details).
*/
@Contract(pure = true)
public S reduce(
SerializableSupplier identitySupplier,
SerializableBiFunction accumulator,
SerializableBinaryOperator combiner)
throws Exception {
checkTimeout();
switch (this.grouping) {
case NONE:
if (this.mappers.stream().noneMatch(MapFunction::isFlatMapper)) {
final SerializableFunction