com.spatial4j.core.io.WktShapeParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spatial4j Show documentation
Show all versions of spatial4j Show documentation
Spatial4j is a general purpose spatial / geospatial ASL licensed open-source Java library. It's
core capabilities are 3-fold: to provide common geospatially-aware shapes, to provide distance
calculations and other math, and to read shape formats like WKT and GeoJSON.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.spatial4j.core.io;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* An extensible parser for
* Well Known Text (WKT).
* The shapes supported by this class are:
*
* - POINT
* - MULTIPOINT
* - ENVELOPE
(strictly isn't WKT but is defined by OGC's
* Common Query Language (CQL))
* - LINESTRING
* - MULTILINESTRING
* - GEOMETRYCOLLECTION
* - BUFFER
(non-standard Spatial4j operation)
*
* 'EMPTY' is supported. Specifying 'Z', 'M', or any other dimensionality in the WKT is effectively
* ignored. Thus, you can specify any number of numbers in the coordinate points but only the first
* two take effect. The javadocs for the parse___Shape
methods further describe these
* shapes, or you
*
*
* Most users of this class will call just one method: {@link #parse(String)}, or
* {@link #parseIfSupported(String)} to not fail if it isn't parse-able.
*
*
* To support more shapes, extend this class and override
* {@link #parseShapeByType(WktShapeParser.State, String)}. It's also possible to delegate to
* a WKTParser by also delegating {@link #newState(String)}.
*
*
* Note, instances of this base class are threadsafe.
*/
public class WktShapeParser {
//TODO support SRID: "SRID=4326;pointPOINT(1,2)
//TODO should reference proposed ShapeFactory instead of ctx, which is a point of indirection that
// might optionally do data validation & normalization
protected final SpatialContext ctx;
/** This constructor is required by {@link com.spatial4j.core.context.SpatialContextFactory#makeWktShapeParser(com.spatial4j.core.context.SpatialContext)}. */
public WktShapeParser(SpatialContext ctx, SpatialContextFactory factory) {
this.ctx = ctx;
}
public SpatialContext getCtx() {
return ctx;
}
/**
* Parses the wktString, returning the defined Shape.
*
* @return Non-null Shape defined in the String
* @throws ParseException Thrown if there is an error in the Shape definition
*/
public Shape parse(String wktString) throws ParseException {
Shape shape = parseIfSupported(wktString);//sets rawString & offset
if (shape != null)
return shape;
String shortenedString = (wktString.length() <= 128 ? wktString : wktString.substring(0, 128-3)+"...");
throw new ParseException("Unknown Shape definition [" + shortenedString + "]", 0);
}
/**
* Parses the wktString, returning the defined Shape. If it can't because the
* shape name is unknown or an empty or blank string was passed, then it returns null.
* If the WKT starts with a supported shape but contains an inner unsupported shape then
* it will result in a {@link ParseException}.
*
* @param wktString non-null, can be empty or have surrounding whitespace
* @return Shape, null if unknown / unsupported shape.
* @throws ParseException Thrown if there is an error in the Shape definition
*/
public Shape parseIfSupported(String wktString) throws ParseException {
State state = newState(wktString);
state.nextIfWhitespace();//leading
if (state.eof())
return null;
//shape types must start with a letter
if (!Character.isLetter(state.rawString.charAt(state.offset)))
return null;
String shapeType = state.nextWord();
Shape result = null;
try {
result = parseShapeByType(state, shapeType);
} catch (ParseException e) {
throw e;
} catch (Exception e) {//most likely InvalidShapeException
ParseException pe = new ParseException(e.toString(), state.offset);
pe.initCause(e);
throw pe;
}
if (result != null && !state.eof())
throw new ParseException("end of shape expected", state.offset);
return result;
}
/** (internal) Creates a new State with the given String. It's only called by
* {@link #parseIfSupported(String)}. This is an extension point for subclassing. */
protected State newState(String wktString) {
//NOTE: if we wanted to re-use old States to reduce object allocation, we might do that
// here. But in the scheme of things, it doesn't seem worth the bother as it complicates the
// thread-safety story of the API for too little of a gain.
return new State(wktString);
}
/**
* (internal) Parses the remainder of a shape definition following the shape's name
* given as {@code shapeType} already consumed via
* {@link State#nextWord()}. If
* it's able to parse the shape, {@link WktShapeParser.State#offset}
* should be advanced beyond
* it (e.g. to the ',' or ')' or EOF in general). The default implementation
* checks the name against some predefined names and calls corresponding
* parse methods to handle the rest. Overriding this method is an
* excellent extension point for additional shape types. Or, use this class by delegation to this
* method.
*
* When writing a parse method that reacts to a specific shape type, remember to handle the
* dimension and EMPTY token via
* {@link com.spatial4j.core.io.WktShapeParser.State#nextIfEmptyAndSkipZM()}.
*
* @param state
* @param shapeType Non-Null string; could have mixed case. The first character is a letter.
* @return The shape or null if not supported / unknown.
*/
protected Shape parseShapeByType(State state, String shapeType) throws ParseException {
assert Character.isLetter(shapeType.charAt(0)) : "Shape must start with letter: "+shapeType;
if (shapeType.equalsIgnoreCase("POINT")) {
return parsePointShape(state);
} else if (shapeType.equalsIgnoreCase("MULTIPOINT")) {
return parseMultiPointShape(state);
} else if (shapeType.equalsIgnoreCase("ENVELOPE")) {
return parseEnvelopeShape(state);
} else if (shapeType.equalsIgnoreCase("GEOMETRYCOLLECTION")) {
return parseGeometryCollectionShape(state);
} else if (shapeType.equalsIgnoreCase("LINESTRING")) {
return parseLineStringShape(state);
} else if (shapeType.equalsIgnoreCase("MULTILINESTRING")) {
return parseMultiLineStringShape(state);
}
//extension
if (shapeType.equalsIgnoreCase("BUFFER")) {
return parseBufferShape(state);
}
// HEY! Update class Javadocs if add more shapes
return null;
}
/**
* Parses the BUFFER operation applied to a parsed shape.
*
* '(' shape ',' number ')'
*
* Whereas 'number' is the distance to buffer the shape by.
*/
protected Shape parseBufferShape(State state) throws ParseException {
state.nextExpect('(');
Shape shape = shape(state);
state.nextExpect(',');
double distance = normDist(state.nextDouble());
state.nextExpect(')');
return shape.getBuffered(distance, ctx);
}
/** Called to normalize a value that isn't X or Y. X & Y or normalized via
* {@link com.spatial4j.core.context.SpatialContext#normX(double)} & normY.
*/
protected double normDist(double v) {//TODO should this be added to ctx?
return v;
}
/**
* Parses a POINT shape from the raw string.
*
* '(' coordinate ')'
*
*
* @see #point(WktShapeParser.State)
*/
protected Shape parsePointShape(State state) throws ParseException {
if (state.nextIfEmptyAndSkipZM())
return ctx.makePoint(Double.NaN, Double.NaN);
state.nextExpect('(');
Point coordinate = point(state);
state.nextExpect(')');
return coordinate;
}
/**
* Parses a MULTIPOINT shape from the raw string -- a collection of points.
*
* '(' coordinate (',' coordinate )* ')'
*
* Furthermore, coordinate can optionally be wrapped in parenthesis.
*
* @see #point(WktShapeParser.State)
*/
protected Shape parseMultiPointShape(State state) throws ParseException {
if (state.nextIfEmptyAndSkipZM())
return ctx.makeCollection(Collections.EMPTY_LIST);
List shapes = new ArrayList();
state.nextExpect('(');
do {
boolean openParen = state.nextIf('(');
Point coordinate = point(state);
if (openParen)
state.nextExpect(')');
shapes.add(coordinate);
} while (state.nextIf(','));
state.nextExpect(')');
return ctx.makeCollection(shapes);
}
/**
* Parses an ENVELOPE (aka Rectangle) shape from the raw string. The values are normalized.
*
* Source: OGC "Catalogue Services Specification", the "CQL" (Common Query Language) sub-spec.
* Note the inconsistent order of the min & max values between x & y!
*
* '(' x1 ',' x2 ',' y2 ',' y1 ')'
*
*/
protected Shape parseEnvelopeShape(State state) throws ParseException {
//FYI no dimension or EMPTY
state.nextExpect('(');
double x1 = state.nextDouble();
state.nextExpect(',');
double x2 = state.nextDouble();
state.nextExpect(',');
double y2 = state.nextDouble();
state.nextExpect(',');
double y1 = state.nextDouble();
state.nextExpect(')');
return ctx.makeRectangle(ctx.normX(x1), ctx.normX(x2), ctx.normY(y1), ctx.normY(y2));
}
/**
* Parses a LINESTRING shape from the raw string -- an ordered sequence of points.
*
* coordinateSequence
*
*
* @see #pointList(WktShapeParser.State)
*/
protected Shape parseLineStringShape(State state) throws ParseException {
if (state.nextIfEmptyAndSkipZM())
return ctx.makeLineString(Collections.emptyList());
List points = pointList(state);
return ctx.makeLineString(points);
}
/**
* Parses a MULTILINESTRING shape from the raw string -- a collection of line strings.
*
* '(' coordinateSequence (',' coordinateSequence )* ')'
*
*
* @see #parseLineStringShape(com.spatial4j.core.io.WktShapeParser.State)
*/
protected Shape parseMultiLineStringShape(State state) throws ParseException {
if (state.nextIfEmptyAndSkipZM())
return ctx.makeCollection(Collections.EMPTY_LIST);
List shapes = new ArrayList();
state.nextExpect('(');
do {
shapes.add(parseLineStringShape(state));
} while (state.nextIf(','));
state.nextExpect(')');
return ctx.makeCollection(shapes);
}
/**
* Parses a GEOMETRYCOLLECTION shape from the raw string.
*
* '(' shape (',' shape )* ')'
*
*/
protected Shape parseGeometryCollectionShape(State state) throws ParseException {
if (state.nextIfEmptyAndSkipZM())
return ctx.makeCollection(Collections.EMPTY_LIST);
List shapes = new ArrayList();
state.nextExpect('(');
do {
shapes.add(shape(state));
} while (state.nextIf(','));
state.nextExpect(')');
return ctx.makeCollection(shapes);
}
/** Reads a shape from the current position, starting with the name of the shape. It
* calls {@link #parseShapeByType(com.spatial4j.core.io.WktShapeParser.State, String)}
* and throws an exception if the shape wasn't supported. */
protected Shape shape(State state) throws ParseException {
String type = state.nextWord();
Shape shape = parseShapeByType(state, type);
if (shape == null)
throw new ParseException("Shape of type "+type+" is unknown", state.offset);
return shape;
}
/**
* Reads a list of Points (AKA CoordinateSequence) from the current position.
*
* '(' coordinate (',' coordinate )* ')'
*
*
* @see #point(WktShapeParser.State)
*/
protected List pointList(State state) throws ParseException {
List sequence = new ArrayList();
state.nextExpect('(');
do {
sequence.add(point(state));
} while (state.nextIf(','));
state.nextExpect(')');
return sequence;
}
/**
* Reads a raw Point (AKA Coordinate) from the current position. Only the first 2 numbers are
* used. The values are normalized.
*
* number number number*
*
*/
protected Point point(State state) throws ParseException {
double x = state.nextDouble();
double y = state.nextDouble();
state.skipNextDoubles();
return ctx.makePoint(ctx.normX(x), ctx.normY(y));
}
/** The parse state. */
public class State {
/** Set in {@link #parseIfSupported(String)}. */
public String rawString;
/** Offset of the next char in {@link #rawString} to be read. */
public int offset;
/** Dimensionality specifier (e.g. 'Z', or 'M') following a shape type name. */
public String dimension;
public State(String rawString) {
this.rawString = rawString;
}
public SpatialContext getCtx() { return ctx; }
public WktShapeParser getParser() { return WktShapeParser.this; }
/**
* Reads the word starting at the current character position. The word
* terminates once {@link Character#isJavaIdentifierPart(char)} returns false (or EOF).
* {@link #offset} is advanced past whitespace.
*
* @return Non-null non-empty String.
*/
public String nextWord() throws ParseException {
int startOffset = offset;
while (offset < rawString.length() &&
Character.isJavaIdentifierPart(rawString.charAt(offset))) {
offset++;
}
if (startOffset == offset)
throw new ParseException("Word expected", startOffset);
String result = rawString.substring(startOffset, offset);
nextIfWhitespace();
return result;
}
/**
* Skips over a dimensionality token (e.g. 'Z' or 'M') if found, storing in
* {@link #dimension}, and then looks for EMPTY, consuming that and whitespace.
*
* dimensionToken? 'EMPTY'?
*
* @return True if EMPTY was found.
*/
public boolean nextIfEmptyAndSkipZM() throws ParseException {
if (eof())
return false;
char c = rawString.charAt(offset);
if (c == '(' || !Character.isJavaIdentifierPart(c))
return false;
String word = nextWord();
if (word.equalsIgnoreCase("EMPTY"))
return true;
//we figure this word is Z or ZM or some other dimensionality signifier. We skip it.
this.dimension = word;
if (eof())
return false;
c = rawString.charAt(offset);
if (c == '(' || !Character.isJavaIdentifierPart(c))
return false;
word = nextWord();
if (word.equalsIgnoreCase("EMPTY"))
return true;
throw new ParseException("Expected EMPTY because found dimension; but got ["+word+"]",
offset);
}
/**
* Reads in a double from the String. Parses digits with an optional decimal, sign, or exponent.
* NaN and Infinity are not supported.
* {@link #offset} is advanced past whitespace.
*
* @return Double value
*/
public double nextDouble() throws ParseException {
int startOffset = offset;
skipDouble();
if (startOffset == offset)
throw new ParseException("Expected a number", offset);
double result;
try {
result = Double.parseDouble(rawString.substring(startOffset, offset));
} catch (Exception e) {
throw new ParseException(e.toString(), offset);
}
nextIfWhitespace();
return result;
}
/** Advances offset forward until it points to a character that isn't part of a number. */
public void skipDouble() {
int startOffset = offset;
for (; offset < rawString.length(); offset++) {
char c = rawString.charAt(offset);
if (!(Character.isDigit(c) || c == '.' || c == '-' || c == '+')) {
//'e' is okay as long as it isn't first
if (offset != startOffset && (c == 'e' || c == 'E'))
continue;
break;
}
}
}
/** Advances past as many doubles as there are, with intervening whitespace. */
public void skipNextDoubles() {
while (!eof()) {
int startOffset = offset;
skipDouble();
if (startOffset == offset)
return;
nextIfWhitespace();
}
}
/**
* Verifies that the current character is of the expected value.
* If the character is the expected value, then it is consumed and
* {@link #offset} is advanced past whitespace.
*
* @param expected The expected char.
*/
public void nextExpect(char expected) throws ParseException {
if (eof())
throw new ParseException("Expected [" + expected + "] found EOF", offset);
char c = rawString.charAt(offset);
if (c != expected)
throw new ParseException("Expected [" + expected + "] found [" + c + "]", offset);
offset++;
nextIfWhitespace();
}
/** If the string is consumed, i.e. at end-of-file. */
public final boolean eof() {
return offset >= rawString.length();
}
/**
* If the current character is {@code expected}, then offset is advanced after it and any
* subsequent whitespace. Otherwise, false is returned.
*
* @param expected The expected char
* @return true if consumed
*/
public boolean nextIf(char expected) {
if (!eof() && rawString.charAt(offset) == expected) {
offset++;
nextIfWhitespace();
return true;
}
return false;
}
/**
* Moves offset to next non-whitespace character. Doesn't move if the offset is already at
* non-whitespace. There is very little reason for subclasses to call this because
* most other parsing methods call it.
*/
public void nextIfWhitespace() {
for (; offset < rawString.length(); offset++) {
if (!Character.isWhitespace(rawString.charAt(offset))) {
return;
}
}
}
/**
* Returns the next chunk of text till the next ',' or ')' (non-inclusive)
* or EOF. If a '(' is encountered, then it looks past its matching ')',
* taking care to handle nested matching parenthesis too. It's designed to be
* of use to subclasses that wish to get the entire subshape at the current
* position as a string so that it might be passed to other software that
* will parse it.
*
* Example:
*
* OUTER(INNER(3, 5))
*
* If this is called when offset is at the first character, then it will
* return this whole string. If called at the "I" then it will return
* "INNER(3, 5)". If called at "3", then it will return "3". In all cases,
* offset will be positioned at the next position following the returned
* substring.
*
* @return non-null substring.
*/
public String nextSubShapeString() throws ParseException {
int startOffset = offset;
int parenStack = 0;//how many parenthesis levels are we in?
for (; offset < rawString.length(); offset++) {
char c = rawString.charAt(offset);
if (c == ',') {
if (parenStack == 0)
break;
} else if (c == ')') {
if (parenStack == 0)
break;
parenStack--;
} else if (c == '(') {
parenStack++;
}
}
if (parenStack != 0)
throw new ParseException("Unbalanced parenthesis", startOffset);
return rawString.substring(startOffset, offset);
}
}//class State
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy