
org.datasyslab.geospark.formatMapper.FormatMapper Maven / Gradle / Ivy
/*
* FILE: FormatMapper
* Copyright (c) 2015 - 2019 GeoSpark Development Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.datasyslab.geospark.formatMapper;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.GeometryCollection;
import com.vividsolutions.jts.geom.GeometryFactory;
import com.vividsolutions.jts.geom.MultiLineString;
import com.vividsolutions.jts.geom.MultiPoint;
import com.vividsolutions.jts.geom.MultiPolygon;
import com.vividsolutions.jts.io.ParseException;
import com.vividsolutions.jts.io.WKBReader;
import com.vividsolutions.jts.io.WKTReader;
import com.vividsolutions.jts.operation.valid.IsValidOp;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.datasyslab.geospark.enums.FileDataSplitter;
import org.datasyslab.geospark.enums.GeometryType;
import org.wololo.geojson.Feature;
import org.wololo.geojson.GeoJSONFactory;
import org.wololo.jts2geojson.GeoJSONReader;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class FormatMapper
implements Serializable, FlatMapFunction, T>
{
/**
* The start offset.
*/
protected final int startOffset;
/**
* The end offset.
*/
/* If the initial value is negative, GeoSpark will consider each field as a spatial attribute if the target object is LineString or Polygon. */
protected final int endOffset;
/**
* The splitter.
*/
protected final FileDataSplitter splitter;
/**
* The carry input data.
*/
protected final boolean carryInputData;
/**
* Non-spatial attributes in each input row will be concatenated to a tab separated string
*/
protected String otherAttributes = "";
protected GeometryType geometryType = null;
/**
* Allow mapping of invalid geometries.
*/
boolean allowTopologicallyInvalidGeometries;
/**
* Crash on syntactically invalid geometries or skip them.
*/
boolean skipSyntacticallyInvalidGeometries;
/**
* The factory.
*/
transient protected GeometryFactory factory = new GeometryFactory();
transient protected GeoJSONReader geoJSONReader = new GeoJSONReader();
transient protected WKTReader wktReader = new WKTReader();
// For some unknown reasons, the wkb reader cannot be used in transient variable like the wkt reader.
final static Logger logger = Logger.getLogger(FormatMapper.class);
/**
* Instantiates a new format mapper.
*
* @param startOffset the start offset
* @param endOffset the end offset
* @param splitter the splitter
* @param carryInputData the carry input data
*/
public FormatMapper(int startOffset, int endOffset, FileDataSplitter splitter, boolean carryInputData, GeometryType geometryType)
{
this.startOffset = startOffset;
this.endOffset = endOffset;
this.splitter = splitter;
this.carryInputData = carryInputData;
this.geometryType = geometryType;
this.allowTopologicallyInvalidGeometries = true;
this.skipSyntacticallyInvalidGeometries = false;
// Only the following formats are allowed to use this format mapper because each input has the geometry type definition
if (geometryType == null)
{
assert splitter == FileDataSplitter.WKB || splitter == FileDataSplitter.WKT || splitter == FileDataSplitter.GEOJSON;
}
}
/**
* Instantiates a new format mapper. This is extensively used in GeoSparkSQL.
* @param splitter
* @param carryInputData
*/
public FormatMapper(FileDataSplitter splitter, boolean carryInputData)
{
this(0,-1,splitter,carryInputData, null);
}
/**
* This format mapper is used in GeoSparkSQL.
* @param splitter
* @param carryInputData
* @param geometryType
*/
public FormatMapper(FileDataSplitter splitter, boolean carryInputData, GeometryType geometryType)
{
this(0, -1, splitter, carryInputData, geometryType);
}
private void readObject(ObjectInputStream inputStream)
throws IOException, ClassNotFoundException
{
inputStream.defaultReadObject();
factory = new GeometryFactory();
wktReader = new WKTReader();
geoJSONReader = new GeoJSONReader();
}
private void handleNonSpatialDataToGeometry(Geometry geometry, List splitedGeometryData)
{
LinkedList splitedGeometryDataList = new LinkedList(splitedGeometryData);
if (carryInputData) {
if (this.splitter != FileDataSplitter.GEOJSON){
//remove spatial data position
splitedGeometryDataList.remove(this.startOffset);
}
geometry.setUserData(String.join("\t", splitedGeometryDataList));
}
}
public Geometry readGeoJSON(String geoJson)
{
final Geometry geometry;
if (geoJson.contains("Feature")) {
Feature feature = (Feature) GeoJSONFactory.create(geoJson);
ArrayList nonSpatialData = new ArrayList<>();
Map featurePropertiesproperties = feature.getProperties();
if (feature.getId()!=null)
{
nonSpatialData.add(feature.getId().toString());
}
if (featurePropertiesproperties!=null)
{
for (Object property:featurePropertiesproperties.values()
) {
nonSpatialData.add(property.toString());
}
}
geometry = geoJSONReader.read(feature.getGeometry());
handleNonSpatialDataToGeometry(geometry, nonSpatialData);
}
else {
geometry = geoJSONReader.read(geoJson);
}
return geometry;
}
public static List readGeoJsonPropertyNames(String geoJson){
if (geoJson.contains("Feature") || geoJson.contains("feature") || geoJson.contains("FEATURE")) {
if (geoJson.contains("properties")) {
Feature feature = (Feature) GeoJSONFactory.create(geoJson);
return new ArrayList(feature.getProperties().keySet());
}
}
logger.warn("[GeoSpark] The GeoJSON file doesn't have feature properties");
return null;
}
public List readPropertyNames(String geoString) {
switch (splitter){
case GEOJSON:
return readGeoJsonPropertyNames(geoString);
default:
return null;
}
}
public Geometry readWkt(String line)
throws ParseException
{
final String[] columns = line.split(splitter.getDelimiter());
Geometry geometry = null;
try {
geometry = wktReader.read(columns[this.startOffset]);
}
catch (Exception e){
logger.error("[GeoSpark] " + e.getMessage());
}
if (geometry == null){
return null;
}
handleNonSpatialDataToGeometry(geometry, Arrays.asList(columns));
return geometry;
}
public Geometry readWkb(String line)
throws ParseException
{
final String[] columns = line.split(splitter.getDelimiter());
final byte[] aux = WKBReader.hexToBytes(columns[this.startOffset]);
// For some unknown reasons, the wkb reader cannot be used in transient variable like the wkt reader.
WKBReader wkbReader = new WKBReader();
final Geometry geometry = wkbReader.read(aux);
handleNonSpatialDataToGeometry(geometry, Arrays.asList(columns));
return geometry;
}
public Coordinate[] readCoordinates(String line)
{
final String[] columns = line.split(splitter.getDelimiter());
final int actualEndOffset = this.endOffset >= 0 ? this.endOffset : (this.geometryType == GeometryType.POINT? startOffset+1:columns.length - 1);
final Coordinate[] coordinates = new Coordinate[(actualEndOffset - startOffset + 1) / 2];
for (int i = this.startOffset; i <= actualEndOffset; i += 2) {
coordinates[(i - startOffset) / 2 ] = new Coordinate(Double.parseDouble(columns[i]), Double.parseDouble(columns[i + 1]));
}
if (carryInputData)
{
boolean firstColumnFlag = true;
otherAttributes = "";
for (int i= 0;i void addMultiGeometry(GeometryCollection multiGeometry, List result)
{
for (int i = 0; i < multiGeometry.getNumGeometries(); i++) {
T geometry = (T) multiGeometry.getGeometryN(i);
geometry.setUserData(multiGeometry.getUserData());
result.add(geometry);
}
}
public Geometry readGeometry(String line)
throws ParseException
{
Geometry geometry = null;
try {
switch (this.splitter) {
case WKT:
geometry = readWkt(line);
break;
case WKB:
geometry = readWkb(line);
break;
case GEOJSON:
geometry = readGeoJSON(line);
break;
default: {
if (this.geometryType == null) {
throw new IllegalArgumentException("[GeoSpark][FormatMapper] You must specify GeometryType when you use delimiter rather than WKB, WKT or GeoJSON");
} else {
geometry = createGeometry(readCoordinates(line), geometryType);
}
}
}
}
catch (Exception e){
logger.error("[GeoSpark] " + e.getMessage());
if (skipSyntacticallyInvalidGeometries == false){
throw e;
}
}
if (geometry == null){
return null;
}
if (allowTopologicallyInvalidGeometries == false) {
IsValidOp isvalidop = new IsValidOp(geometry);
if (isvalidop.isValid() == false) {
geometry = null;
}
}
return geometry;
}
private Geometry createGeometry(Coordinate[] coordinates, GeometryType geometryType)
{
GeometryFactory geometryFactory = new GeometryFactory();
Geometry geometry = null;
switch (geometryType) {
case POINT:
geometry = geometryFactory.createPoint(coordinates[0]);
break;
case POLYGON:
geometry = geometryFactory.createPolygon(coordinates);
break;
case LINESTRING:
geometry = geometryFactory.createLineString(coordinates);
break;
case RECTANGLE:
// The rectangle mapper reads two coordinates from the input line. The two coordinates are the two on the diagonal.
assert coordinates.length == 2;
Coordinate[] polyCoordinates = new Coordinate[5];
polyCoordinates[0] = coordinates[0];
polyCoordinates[1] = new Coordinate(coordinates[0].x, coordinates[1].y);
polyCoordinates[2] = coordinates[1];
polyCoordinates[3] = new Coordinate(coordinates[1].x, coordinates[0].y);
polyCoordinates[4] = polyCoordinates[0];
geometry = factory.createPolygon(polyCoordinates);
break;
// Read string to point if no geometry type specified but GeoSpark should never reach here
default:
geometry = geometryFactory.createPoint(coordinates[0]);
}
if (carryInputData)
{
geometry.setUserData(otherAttributes);
}
return geometry;
}
@Override
public Iterator call(Iterator stringIterator)
throws Exception
{
List result = new ArrayList<>();
while (stringIterator.hasNext()) {
String line = stringIterator.next();
addGeometry(readGeometry(line), result);
}
return result.iterator();
}
private void addGeometry(Geometry geometry, List result)
{
if (geometry == null) {
return;
}
if (geometry instanceof MultiPoint) {
addMultiGeometry((MultiPoint) geometry, result);
}
else if (geometry instanceof MultiLineString) {
addMultiGeometry((MultiLineString) geometry, result);
}
else if (geometry instanceof MultiPolygon) {
addMultiGeometry((MultiPolygon) geometry, result);
}
else {
result.add((T) geometry);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy