com.onthegomap.planetiler.Planetiler Maven / Gradle / Ivy
package com.onthegomap.planetiler;
import com.onthegomap.planetiler.archive.TileArchiveConfig;
import com.onthegomap.planetiler.archive.TileArchiveMetadata;
import com.onthegomap.planetiler.archive.TileArchiveWriter;
import com.onthegomap.planetiler.archive.TileArchives;
import com.onthegomap.planetiler.archive.WriteableTileArchive;
import com.onthegomap.planetiler.collection.FeatureGroup;
import com.onthegomap.planetiler.collection.LongLongMap;
import com.onthegomap.planetiler.collection.LongLongMultimap;
import com.onthegomap.planetiler.config.Arguments;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.reader.GeoPackageReader;
import com.onthegomap.planetiler.reader.NaturalEarthReader;
import com.onthegomap.planetiler.reader.ShapefileReader;
import com.onthegomap.planetiler.reader.SourceFeature;
import com.onthegomap.planetiler.reader.osm.OsmInputFile;
import com.onthegomap.planetiler.reader.osm.OsmNodeBoundsProvider;
import com.onthegomap.planetiler.reader.osm.OsmReader;
import com.onthegomap.planetiler.reader.parquet.ParquetReader;
import com.onthegomap.planetiler.stats.ProcessInfo;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.stats.Timers;
import com.onthegomap.planetiler.util.AnsiColors;
import com.onthegomap.planetiler.util.BuildInfo;
import com.onthegomap.planetiler.util.ByteBufferUtil;
import com.onthegomap.planetiler.util.Downloader;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.Format;
import com.onthegomap.planetiler.util.Geofabrik;
import com.onthegomap.planetiler.util.LogUtil;
import com.onthegomap.planetiler.util.ResourceUsage;
import com.onthegomap.planetiler.util.TileSizeStats;
import com.onthegomap.planetiler.util.TopOsmTiles;
import com.onthegomap.planetiler.util.Translations;
import com.onthegomap.planetiler.util.Wikidata;
import com.onthegomap.planetiler.validator.JavaProfileValidator;
import com.onthegomap.planetiler.worker.RunnableThatThrows;
import java.io.IOException;
import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* High-level API for creating a new map that ties together lower-level utilities in a way that is suitable for the most
* common use-cases.
*
* For example:
*
*
*
* public static void main(String[] args) {
* Planetiler.create(arguments)
* .setProfile(new CustomProfile())
* .addShapefileSource("shapefile", Path.of("shapefile.zip"))
* .addNaturalEarthSource("natural_earth", Path.of("natural_earth.zip"))
* .addOsmSource("osm", Path.of("source.osm.pbf"))
* .setOutput("mbtiles", Path.of("output.mbtiles"))
* .run();
* }
*
*
* Each call to a builder API mutates the runner instance and returns it for more chaining.
*
* See {@code ToiletsOverlayLowLevelApi} or unit tests for examples using the low-level API.
*/
@SuppressWarnings("UnusedReturnValue")
public class Planetiler {
private static final Logger LOGGER = LoggerFactory.getLogger(Planetiler.class);
private final List stages = new ArrayList<>();
private final List toDownload = new ArrayList<>();
private final List inputPaths = new ArrayList<>();
private final Timers.Finishable overallTimer;
private final Arguments arguments;
private final Stats stats;
private final Path tmpDir;
private final Path nodeDbPath;
private final Path multipolygonPath;
private final Path featureDbPath;
private final Path onlyRunTests;
private boolean downloadSources;
private final boolean refreshSources;
private final boolean onlyDownloadSources;
private final boolean parseNodeBounds;
private Profile profile = null;
private Function profileProvider = null;
private final PlanetilerConfig config;
private FeatureGroup featureGroup;
private OsmInputFile osmInputFile;
private TileArchiveConfig output;
private boolean overwrite = false;
private boolean ran = false;
// most common OSM languages
private List languages = List.of(
"en", "ru", "ar", "zh", "ja", "ko", "fr",
"de", "fi", "pl", "es", "be", "br", "he"
);
private Translations translations;
private Path wikidataNamesFile;
private boolean useWikidata = false;
private boolean onlyFetchWikidata = false;
private boolean fetchWikidata = false;
private Duration wikidataMaxAge = Duration.ZERO;
private int wikidataUpdateLimit = 0;
private final boolean fetchOsmTileStats;
private TileArchiveMetadata tileArchiveMetadata;
private Planetiler(Arguments arguments) {
this.arguments = arguments;
stats = arguments.getStats();
overallTimer = stats.startStageQuietly("overall");
config = PlanetilerConfig.from(arguments);
if (config.color() != null) {
AnsiColors.setUseColors(config.color());
}
tmpDir = config.tmpDir();
onlyDownloadSources = arguments.getBoolean("only_download", "download source data then exit", false);
onlyRunTests = arguments.file("tests", "run test cases in a yaml then quit", null);
downloadSources = onlyDownloadSources || arguments.getBoolean("download", "download sources", false);
refreshSources =
arguments.getBoolean("refresh_sources", "download new version of source files if they have changed", false);
fetchOsmTileStats =
arguments.getBoolean("download_osm_tile_weights", "download OSM tile weights file", downloadSources);
nodeDbPath = arguments.file("temp_nodes", "temp node db location", tmpDir.resolve("node.db"));
multipolygonPath =
arguments.file("temp_multipolygons", "temp multipolygon db location", tmpDir.resolve("multipolygon.db"));
featureDbPath = arguments.file("temp_features", "temp feature db location", tmpDir.resolve("feature.db"));
parseNodeBounds =
arguments.getBoolean("osm_parse_node_bounds", "parse bounds from OSM nodes instead of header", false);
}
/** Returns a new empty runner that will get configuration from {@code arguments}. */
public static Planetiler create(Arguments arguments) {
return new Planetiler(arguments);
}
/**
* Adds a new {@code .osm.pbf} source that will be processed when {@link #run()} is called.
*
* To override the location of the {@code .osm.pbf} file, set {@code name_path=newpath.osm.pbf} in the arguments.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} argument is not set
* @return this runner instance for chaining
* @see OsmInputFile
* @see OsmReader
*/
public Planetiler addOsmSource(String name, Path defaultPath) {
return addOsmSource(name, defaultPath, null);
}
/**
* Adds a new {@code .osm.pbf} source that will be processed when {@link #run()} is called.
*
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
*
* To override the location of the {@code .osm.pbf} file, set {@code name_path=newpath.osm.pbf} in the arguments and
* to override the download URL set {@code name_url=http://url/of/osm.pbf}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} argument is not set
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set. As a shortcut, can use "geofabrik:monaco" or
* "geofabrik:australia" shorthand to find an extract by name from
* Geofabrik download site or "aws:latest" to download
* the latest {@code planet.osm.pbf} file from AWS
* Open Data Registry.
* @return this runner instance for chaining
* @see OsmInputFile
* @see OsmReader
* @see Downloader
* @see Geofabrik
*/
public Planetiler addOsmSource(String name, Path defaultPath, String defaultUrl) {
if (osmInputFile != null) {
// TODO: support more than one input OSM file
throw new IllegalArgumentException("Currently only one OSM input file is supported");
}
Path path = getPath(name, "OSM input file", defaultPath, defaultUrl);
var thisInputFile = new OsmInputFile(path, config.osmLazyReads());
osmInputFile = thisInputFile;
// fail fast if there is some issue with madvise on this system
if (config.nodeMapMadvise() || config.multipolygonGeometryMadvise()) {
ByteBufferUtil.init();
}
return appendStage(new Stage(
name,
List.of(
name + "_pass1: Pre-process OpenStreetMap input (store node locations then relation members)",
name + "_pass2: Process OpenStreetMap nodes, ways, then relations"
),
ifSourceUsed(name, () -> {
var header = osmInputFile.getHeader();
tileArchiveMetadata.setExtraMetadata("planetiler:" + name + ":osmosisreplicationtime", header.instant());
tileArchiveMetadata.setExtraMetadata("planetiler:" + name + ":osmosisreplicationseq",
header.osmosisReplicationSequenceNumber());
tileArchiveMetadata.setExtraMetadata("planetiler:" + name + ":osmosisreplicationurl",
header.osmosisReplicationBaseUrl());
try (
var nodeLocations =
LongLongMap.from(config.nodeMapType(), config.nodeMapStorage(), nodeDbPath, config.nodeMapMadvise());
var multipolygonGeometries = LongLongMultimap.newReplaceableMultimap(
config.multipolygonGeometryStorage(), multipolygonPath, config.multipolygonGeometryMadvise());
var osmReader = new OsmReader(name, thisInputFile, nodeLocations, multipolygonGeometries, profile(), stats)
) {
osmReader.pass1(config);
osmReader.pass2(featureGroup, config);
} finally {
FileUtils.delete(nodeDbPath);
FileUtils.delete(multipolygonPath);
}
}))
);
}
/**
* Adds a new ESRI shapefile source that will be processed using a projection inferred from the shapefile when
* {@link #run()} is called.
*
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileSource(String name, Path defaultPath) {
return addShapefileSource(null, name, defaultPath);
}
/**
* Adds a new ESRI shapefile source that will be processed using an explicit projection when {@link #run()} is called.
*
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileSource(String projection, String name, Path defaultPath) {
return addShapefileSource(projection, name, defaultPath, null);
}
/**
* Adds a new ESRI shapefile source that will be processed with a projection inferred from the shapefile when
* {@link #run()} is called.
*
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
*
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments and
* to override the download URL set {@code name_url=http://url/of/shapefile.zip}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set
* @return this runner instance for chaining
* @see ShapefileReader
* @see Downloader
*/
public Planetiler addShapefileSource(String name, Path defaultPath, String defaultUrl) {
return addShapefileSource(null, name, defaultPath, defaultUrl);
}
/**
* Adds a new ESRI shapefile glob source that will process all files under {@param basePath} matching
* {@param globPattern}. {@param basePath} may be a directory or ZIP archive.
*
* @param sourceName string to use in stats and logs to identify this stage
* @param basePath path to the directory containing shapefiles to process
* @param globPattern string to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileGlobSource(String sourceName, Path basePath, String globPattern) {
return addShapefileGlobSource(null, sourceName, basePath, globPattern, null);
}
/**
* Adds a new ESRI shapefile glob source that will process all files under {@param basePath} matching
* {@param globPattern} using an explicit projection. {@param basePath} may be a directory or ZIP archive.
*
* If {@param globPattern} matches a ZIP archive, all files ending in {@code .shp} within the archive will be used for
* this source.
*
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
*
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param sourceName string to use in stats and logs to identify this stage
* @param basePath path to the directory or zip file containing shapefiles to process
* @param globPattern string to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileGlobSource(String projection, String sourceName, Path basePath,
String globPattern, String defaultUrl) {
Path dirPath = getPath(sourceName, "shapefile glob", basePath, defaultUrl);
return addStage(sourceName, "Process all files matching " + dirPath + "/" + globPattern,
ifSourceUsed(sourceName, () -> {
var sourcePaths = FileUtils.walkPathWithPattern(basePath, globPattern,
zipPath -> FileUtils.walkPathWithPattern(zipPath, "*.shp"));
ShapefileReader.processWithProjection(projection, sourceName, sourcePaths, featureGroup, config,
profile, stats);
}));
}
/**
* Adds a new ESRI shapefile source that will be processed with an explicit projection when {@link #run()} is called.
*
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
*
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments and
* to override the download URL set {@code name_url=http://url/of/shapefile.zip}.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set
* @return this runner instance for chaining
* @see ShapefileReader
* @see Downloader
*/
public Planetiler addShapefileSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "shapefile", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name, () -> {
List sourcePaths = List.of(path);
if (FileUtils.hasExtension(path, "zip") || Files.isDirectory(path)) {
sourcePaths = FileUtils.walkPathWithPattern(path, "*.shp");
}
ShapefileReader.processWithProjection(projection, name, sourcePaths, featureGroup, config, profile, stats);
}));
}
/**
* Adds a new OGC GeoPackage source that will be processed when {@link #run()} is called.
*
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
*
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
*
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set
* @return this runner instance for chaining
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl);
boolean keepUnzipped = getKeepUnzipped(name);
return addStage(name, "Process features in " + path,
ifSourceUsed(name, () -> {
List sourcePaths = List.of(path);
if (FileUtils.hasExtension(path, "zip")) {
sourcePaths = FileUtils.walkPathWithPattern(path, "*.gpkg");
}
if (sourcePaths.isEmpty()) {
throw new IllegalArgumentException("No .gpkg files found in " + path);
}
GeoPackageReader.process(projection, name, sourcePaths,
keepUnzipped ? path.resolveSibling(path.getFileName() + "-unzipped") : tmpDir, featureGroup, config, profile,
stats, keepUnzipped);
}));
}
/**
* Adds a new OGC GeoPackage source that will be processed when {@link #run()} is called.
*
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
*
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
*
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set
* @return this runner instance for chaining
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String name, Path defaultPath, String defaultUrl) {
return addGeoPackageSource(null, name, defaultPath, defaultUrl);
}
/**
* Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called.
*
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
* @return this runner instance for chaining
* @see NaturalEarthReader
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath) {
return addNaturalEarthSource(name, defaultPath, null);
}
/**
* Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called.
*
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
*
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set
* @return this runner instance for chaining
* @see NaturalEarthReader
* @see Downloader
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "sqlite db", defaultPath, defaultUrl);
boolean keepUnzipped = getKeepUnzipped(name);
return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader
.process(name, path, keepUnzipped ? path.resolveSibling(path.getFileName() + "-unzipped") : tmpDir, featureGroup,
config, profile, stats, keepUnzipped)));
}
/**
* Adds a new geoparquet source that will be processed when
* {@link #run()} is called.
*
* @param name string to use in stats and logs to identify this stage
* @param paths paths to the geoparquet files to read.
* @param hivePartitioning Set to true to parse extra feature tags from the file path, for example
* {@code {them="buildings", type="part"}} from
* {@code base/theme=buildings/type=part/file.parquet}
* @param getId function that extracts a unique vector tile feature ID from each input feature, string or
* binary features will be hashed to a {@code long}.
* @param getLayer function that extracts {@link SourceFeature#getSourceLayer()} from the properties of each
* input feature
* @return this runner instance for chaining
* @see GeoPackageReader
*/
public Planetiler addParquetSource(String name, List paths, boolean hivePartitioning,
Function