All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spatial4j.core.io.WktShapeParser Maven / Gradle / Ivy

Go to download

Spatial4j is a general purpose spatial / geospatial ASL licensed open-source Java library. It's core capabilities are 3-fold: to provide common geospatially-aware shapes, to provide distance calculations and other math, and to read shape formats like WKT and GeoJSON.

There is a newer version: 0.5
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.spatial4j.core.io;


import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;

import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * An extensible parser for 
 * Well Known Text (WKT).
 * The shapes supported by this class are:
 * 
    *
  • POINT
  • *
  • MULTIPOINT
  • *
  • ENVELOPE
  • (strictly isn't WKT but is defined by OGC's * Common Query Language (CQL)) *
  • LINESTRING
  • *
  • MULTILINESTRING
  • *
  • GEOMETRYCOLLECTION
  • *
  • BUFFER
  • (non-standard Spatial4j operation) *
* 'EMPTY' is supported. Specifying 'Z', 'M', or any other dimensionality in the WKT is effectively * ignored. Thus, you can specify any number of numbers in the coordinate points but only the first * two take effect. The javadocs for the parse___Shape methods further describe these * shapes, or you * *

* Most users of this class will call just one method: {@link #parse(String)}, or * {@link #parseIfSupported(String)} to not fail if it isn't parse-able. * *

* To support more shapes, extend this class and override * {@link #parseShapeByType(WktShapeParser.State, String)}. It's also possible to delegate to * a WKTParser by also delegating {@link #newState(String)}. * *

* Note, instances of this base class are threadsafe. */ public class WktShapeParser { //TODO support SRID: "SRID=4326;pointPOINT(1,2) //TODO should reference proposed ShapeFactory instead of ctx, which is a point of indirection that // might optionally do data validation & normalization protected final SpatialContext ctx; /** This constructor is required by {@link com.spatial4j.core.context.SpatialContextFactory#makeWktShapeParser(com.spatial4j.core.context.SpatialContext)}. */ public WktShapeParser(SpatialContext ctx, SpatialContextFactory factory) { this.ctx = ctx; } public SpatialContext getCtx() { return ctx; } /** * Parses the wktString, returning the defined Shape. * * @return Non-null Shape defined in the String * @throws ParseException Thrown if there is an error in the Shape definition */ public Shape parse(String wktString) throws ParseException { Shape shape = parseIfSupported(wktString);//sets rawString & offset if (shape != null) return shape; String shortenedString = (wktString.length() <= 128 ? wktString : wktString.substring(0, 128-3)+"..."); throw new ParseException("Unknown Shape definition [" + shortenedString + "]", 0); } /** * Parses the wktString, returning the defined Shape. If it can't because the * shape name is unknown or an empty or blank string was passed, then it returns null. * If the WKT starts with a supported shape but contains an inner unsupported shape then * it will result in a {@link ParseException}. * * @param wktString non-null, can be empty or have surrounding whitespace * @return Shape, null if unknown / unsupported shape. * @throws ParseException Thrown if there is an error in the Shape definition */ public Shape parseIfSupported(String wktString) throws ParseException { State state = newState(wktString); state.nextIfWhitespace();//leading if (state.eof()) return null; //shape types must start with a letter if (!Character.isLetter(state.rawString.charAt(state.offset))) return null; String shapeType = state.nextWord(); Shape result = null; try { result = parseShapeByType(state, shapeType); } catch (ParseException e) { throw e; } catch (Exception e) {//most likely InvalidShapeException ParseException pe = new ParseException(e.toString(), state.offset); pe.initCause(e); throw pe; } if (result != null && !state.eof()) throw new ParseException("end of shape expected", state.offset); return result; } /** (internal) Creates a new State with the given String. It's only called by * {@link #parseIfSupported(String)}. This is an extension point for subclassing. */ protected State newState(String wktString) { //NOTE: if we wanted to re-use old States to reduce object allocation, we might do that // here. But in the scheme of things, it doesn't seem worth the bother as it complicates the // thread-safety story of the API for too little of a gain. return new State(wktString); } /** * (internal) Parses the remainder of a shape definition following the shape's name * given as {@code shapeType} already consumed via * {@link State#nextWord()}. If * it's able to parse the shape, {@link WktShapeParser.State#offset} * should be advanced beyond * it (e.g. to the ',' or ')' or EOF in general). The default implementation * checks the name against some predefined names and calls corresponding * parse methods to handle the rest. Overriding this method is an * excellent extension point for additional shape types. Or, use this class by delegation to this * method. *

* When writing a parse method that reacts to a specific shape type, remember to handle the * dimension and EMPTY token via * {@link com.spatial4j.core.io.WktShapeParser.State#nextIfEmptyAndSkipZM()}. * * @param state * @param shapeType Non-Null string; could have mixed case. The first character is a letter. * @return The shape or null if not supported / unknown. */ protected Shape parseShapeByType(State state, String shapeType) throws ParseException { assert Character.isLetter(shapeType.charAt(0)) : "Shape must start with letter: "+shapeType; if (shapeType.equalsIgnoreCase("POINT")) { return parsePointShape(state); } else if (shapeType.equalsIgnoreCase("MULTIPOINT")) { return parseMultiPointShape(state); } else if (shapeType.equalsIgnoreCase("ENVELOPE")) { return parseEnvelopeShape(state); } else if (shapeType.equalsIgnoreCase("GEOMETRYCOLLECTION")) { return parseGeometryCollectionShape(state); } else if (shapeType.equalsIgnoreCase("LINESTRING")) { return parseLineStringShape(state); } else if (shapeType.equalsIgnoreCase("MULTILINESTRING")) { return parseMultiLineStringShape(state); } //extension if (shapeType.equalsIgnoreCase("BUFFER")) { return parseBufferShape(state); } // HEY! Update class Javadocs if add more shapes return null; } /** * Parses the BUFFER operation applied to a parsed shape. *

   *   '(' shape ',' number ')'
   * 
* Whereas 'number' is the distance to buffer the shape by. */ protected Shape parseBufferShape(State state) throws ParseException { state.nextExpect('('); Shape shape = shape(state); state.nextExpect(','); double distance = normDist(state.nextDouble()); state.nextExpect(')'); return shape.getBuffered(distance, ctx); } /** Called to normalize a value that isn't X or Y. X & Y or normalized via * {@link com.spatial4j.core.context.SpatialContext#normX(double)} & normY. */ protected double normDist(double v) {//TODO should this be added to ctx? return v; } /** * Parses a POINT shape from the raw string. *
   *   '(' coordinate ')'
   * 
* * @see #point(WktShapeParser.State) */ protected Shape parsePointShape(State state) throws ParseException { if (state.nextIfEmptyAndSkipZM()) return ctx.makePoint(Double.NaN, Double.NaN); state.nextExpect('('); Point coordinate = point(state); state.nextExpect(')'); return coordinate; } /** * Parses a MULTIPOINT shape from the raw string -- a collection of points. *
   *   '(' coordinate (',' coordinate )* ')'
   * 
* Furthermore, coordinate can optionally be wrapped in parenthesis. * * @see #point(WktShapeParser.State) */ protected Shape parseMultiPointShape(State state) throws ParseException { if (state.nextIfEmptyAndSkipZM()) return ctx.makeCollection(Collections.EMPTY_LIST); List shapes = new ArrayList(); state.nextExpect('('); do { boolean openParen = state.nextIf('('); Point coordinate = point(state); if (openParen) state.nextExpect(')'); shapes.add(coordinate); } while (state.nextIf(',')); state.nextExpect(')'); return ctx.makeCollection(shapes); } /** * Parses an ENVELOPE (aka Rectangle) shape from the raw string. The values are normalized. *

* Source: OGC "Catalogue Services Specification", the "CQL" (Common Query Language) sub-spec. * Note the inconsistent order of the min & max values between x & y! *

   *   '(' x1 ',' x2 ',' y2 ',' y1 ')'
   * 
*/ protected Shape parseEnvelopeShape(State state) throws ParseException { //FYI no dimension or EMPTY state.nextExpect('('); double x1 = state.nextDouble(); state.nextExpect(','); double x2 = state.nextDouble(); state.nextExpect(','); double y2 = state.nextDouble(); state.nextExpect(','); double y1 = state.nextDouble(); state.nextExpect(')'); return ctx.makeRectangle(ctx.normX(x1), ctx.normX(x2), ctx.normY(y1), ctx.normY(y2)); } /** * Parses a LINESTRING shape from the raw string -- an ordered sequence of points. *
   *   coordinateSequence
   * 
* * @see #pointList(WktShapeParser.State) */ protected Shape parseLineStringShape(State state) throws ParseException { if (state.nextIfEmptyAndSkipZM()) return ctx.makeLineString(Collections.emptyList()); List points = pointList(state); return ctx.makeLineString(points); } /** * Parses a MULTILINESTRING shape from the raw string -- a collection of line strings. *
   *   '(' coordinateSequence (',' coordinateSequence )* ')'
   * 
* * @see #parseLineStringShape(com.spatial4j.core.io.WktShapeParser.State) */ protected Shape parseMultiLineStringShape(State state) throws ParseException { if (state.nextIfEmptyAndSkipZM()) return ctx.makeCollection(Collections.EMPTY_LIST); List shapes = new ArrayList(); state.nextExpect('('); do { shapes.add(parseLineStringShape(state)); } while (state.nextIf(',')); state.nextExpect(')'); return ctx.makeCollection(shapes); } /** * Parses a GEOMETRYCOLLECTION shape from the raw string. *
   *   '(' shape (',' shape )* ')'
   * 
*/ protected Shape parseGeometryCollectionShape(State state) throws ParseException { if (state.nextIfEmptyAndSkipZM()) return ctx.makeCollection(Collections.EMPTY_LIST); List shapes = new ArrayList(); state.nextExpect('('); do { shapes.add(shape(state)); } while (state.nextIf(',')); state.nextExpect(')'); return ctx.makeCollection(shapes); } /** Reads a shape from the current position, starting with the name of the shape. It * calls {@link #parseShapeByType(com.spatial4j.core.io.WktShapeParser.State, String)} * and throws an exception if the shape wasn't supported. */ protected Shape shape(State state) throws ParseException { String type = state.nextWord(); Shape shape = parseShapeByType(state, type); if (shape == null) throw new ParseException("Shape of type "+type+" is unknown", state.offset); return shape; } /** * Reads a list of Points (AKA CoordinateSequence) from the current position. *
   *   '(' coordinate (',' coordinate )* ')'
   * 
* * @see #point(WktShapeParser.State) */ protected List pointList(State state) throws ParseException { List sequence = new ArrayList(); state.nextExpect('('); do { sequence.add(point(state)); } while (state.nextIf(',')); state.nextExpect(')'); return sequence; } /** * Reads a raw Point (AKA Coordinate) from the current position. Only the first 2 numbers are * used. The values are normalized. *
   *   number number number*
   * 
*/ protected Point point(State state) throws ParseException { double x = state.nextDouble(); double y = state.nextDouble(); state.skipNextDoubles(); return ctx.makePoint(ctx.normX(x), ctx.normY(y)); } /** The parse state. */ public class State { /** Set in {@link #parseIfSupported(String)}. */ public String rawString; /** Offset of the next char in {@link #rawString} to be read. */ public int offset; /** Dimensionality specifier (e.g. 'Z', or 'M') following a shape type name. */ public String dimension; public State(String rawString) { this.rawString = rawString; } public SpatialContext getCtx() { return ctx; } public WktShapeParser getParser() { return WktShapeParser.this; } /** * Reads the word starting at the current character position. The word * terminates once {@link Character#isJavaIdentifierPart(char)} returns false (or EOF). * {@link #offset} is advanced past whitespace. * * @return Non-null non-empty String. */ public String nextWord() throws ParseException { int startOffset = offset; while (offset < rawString.length() && Character.isJavaIdentifierPart(rawString.charAt(offset))) { offset++; } if (startOffset == offset) throw new ParseException("Word expected", startOffset); String result = rawString.substring(startOffset, offset); nextIfWhitespace(); return result; } /** * Skips over a dimensionality token (e.g. 'Z' or 'M') if found, storing in * {@link #dimension}, and then looks for EMPTY, consuming that and whitespace. *
     *   dimensionToken? 'EMPTY'?
     * 
* @return True if EMPTY was found. */ public boolean nextIfEmptyAndSkipZM() throws ParseException { if (eof()) return false; char c = rawString.charAt(offset); if (c == '(' || !Character.isJavaIdentifierPart(c)) return false; String word = nextWord(); if (word.equalsIgnoreCase("EMPTY")) return true; //we figure this word is Z or ZM or some other dimensionality signifier. We skip it. this.dimension = word; if (eof()) return false; c = rawString.charAt(offset); if (c == '(' || !Character.isJavaIdentifierPart(c)) return false; word = nextWord(); if (word.equalsIgnoreCase("EMPTY")) return true; throw new ParseException("Expected EMPTY because found dimension; but got ["+word+"]", offset); } /** * Reads in a double from the String. Parses digits with an optional decimal, sign, or exponent. * NaN and Infinity are not supported. * {@link #offset} is advanced past whitespace. * * @return Double value */ public double nextDouble() throws ParseException { int startOffset = offset; skipDouble(); if (startOffset == offset) throw new ParseException("Expected a number", offset); double result; try { result = Double.parseDouble(rawString.substring(startOffset, offset)); } catch (Exception e) { throw new ParseException(e.toString(), offset); } nextIfWhitespace(); return result; } /** Advances offset forward until it points to a character that isn't part of a number. */ public void skipDouble() { int startOffset = offset; for (; offset < rawString.length(); offset++) { char c = rawString.charAt(offset); if (!(Character.isDigit(c) || c == '.' || c == '-' || c == '+')) { //'e' is okay as long as it isn't first if (offset != startOffset && (c == 'e' || c == 'E')) continue; break; } } } /** Advances past as many doubles as there are, with intervening whitespace. */ public void skipNextDoubles() { while (!eof()) { int startOffset = offset; skipDouble(); if (startOffset == offset) return; nextIfWhitespace(); } } /** * Verifies that the current character is of the expected value. * If the character is the expected value, then it is consumed and * {@link #offset} is advanced past whitespace. * * @param expected The expected char. */ public void nextExpect(char expected) throws ParseException { if (eof()) throw new ParseException("Expected [" + expected + "] found EOF", offset); char c = rawString.charAt(offset); if (c != expected) throw new ParseException("Expected [" + expected + "] found [" + c + "]", offset); offset++; nextIfWhitespace(); } /** If the string is consumed, i.e. at end-of-file. */ public final boolean eof() { return offset >= rawString.length(); } /** * If the current character is {@code expected}, then offset is advanced after it and any * subsequent whitespace. Otherwise, false is returned. * * @param expected The expected char * @return true if consumed */ public boolean nextIf(char expected) { if (!eof() && rawString.charAt(offset) == expected) { offset++; nextIfWhitespace(); return true; } return false; } /** * Moves offset to next non-whitespace character. Doesn't move if the offset is already at * non-whitespace. There is very little reason for subclasses to call this because * most other parsing methods call it. */ public void nextIfWhitespace() { for (; offset < rawString.length(); offset++) { if (!Character.isWhitespace(rawString.charAt(offset))) { return; } } } /** * Returns the next chunk of text till the next ',' or ')' (non-inclusive) * or EOF. If a '(' is encountered, then it looks past its matching ')', * taking care to handle nested matching parenthesis too. It's designed to be * of use to subclasses that wish to get the entire subshape at the current * position as a string so that it might be passed to other software that * will parse it. *

* Example: *

     *   OUTER(INNER(3, 5))
     * 
* If this is called when offset is at the first character, then it will * return this whole string. If called at the "I" then it will return * "INNER(3, 5)". If called at "3", then it will return "3". In all cases, * offset will be positioned at the next position following the returned * substring. * * @return non-null substring. */ public String nextSubShapeString() throws ParseException { int startOffset = offset; int parenStack = 0;//how many parenthesis levels are we in? for (; offset < rawString.length(); offset++) { char c = rawString.charAt(offset); if (c == ',') { if (parenStack == 0) break; } else if (c == ')') { if (parenStack == 0) break; parenStack--; } else if (c == '(') { parenStack++; } } if (parenStack != 0) throw new ParseException("Unbalanced parenthesis", startOffset); return rawString.substring(startOffset, offset); } }//class State }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy