All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.geo.SimpleWKTShapeParser Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.geo;

import java.io.IOException;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

/**
 * Parses shape geometry represented in WKT format
 *
 * 

complies with OGC® document: 12-063r5 and ISO/IEC 13249-3:2016 standard located at * http://docs.opengeospatial.org/is/12-063r5/12-063r5.html */ public class SimpleWKTShapeParser { public static final String EMPTY = "EMPTY"; public static final String SPACE = " "; public static final String LPAREN = "("; public static final String RPAREN = ")"; public static final String COMMA = ","; public static final String NAN = "NaN"; private static final String NUMBER = ""; private static final String EOF = "END-OF-STREAM"; private static final String EOL = "END-OF-LINE"; // no instance private SimpleWKTShapeParser() {} public static Object parse(String wkt) throws IOException, ParseException { return parseExpectedType(wkt, null); } public static Object parseExpectedType(String wkt, final ShapeType shapeType) throws IOException, ParseException { try (StringReader reader = new StringReader(wkt)) { // set up the tokenizer; configured to read words w/o numbers StreamTokenizer tokenizer = new StreamTokenizer(reader); tokenizer.resetSyntax(); tokenizer.wordChars('a', 'z'); tokenizer.wordChars('A', 'Z'); tokenizer.wordChars(128 + 32, 255); tokenizer.wordChars('0', '9'); tokenizer.wordChars('-', '-'); tokenizer.wordChars('+', '+'); tokenizer.wordChars('.', '.'); tokenizer.whitespaceChars(0, ' '); tokenizer.commentChar('#'); Object geometry = parseGeometry(tokenizer, shapeType); checkEOF(tokenizer); return geometry; } } /** parse geometry from the stream tokenizer */ private static Object parseGeometry(StreamTokenizer stream, ShapeType shapeType) throws IOException, ParseException { final ShapeType type = ShapeType.forName(nextWord(stream)); if (shapeType != null && shapeType != ShapeType.GEOMETRYCOLLECTION) { if (type.wktName().equals(shapeType.wktName()) == false) { throw new ParseException( "Expected geometry type: [" + shapeType + "], but found: [" + type + "]", stream.lineno()); } } switch (type) { case POINT: return parsePoint(stream); case MULTIPOINT: return parseMultiPoint(stream); case LINESTRING: return parseLine(stream); case MULTILINESTRING: return parseMultiLine(stream); case POLYGON: return parsePolygon(stream); case MULTIPOLYGON: return parseMultiPolygon(stream); case ENVELOPE: return parseBBox(stream); case GEOMETRYCOLLECTION: return parseGeometryCollection(stream); default: throw new IllegalArgumentException("Unknown geometry type: " + type); } } /** Parses a point as a double array */ private static double[] parsePoint(StreamTokenizer stream) throws IOException, ParseException { if (nextEmptyOrOpen(stream).equals(EMPTY)) { return null; } double[] pt = new double[] {nextNumber(stream), nextNumber(stream)}; if (isNumberNext(stream)) { nextNumber(stream); } nextCloser(stream); return pt; } /** Parses a list of points into latitude and longitude arraylists */ private static void parseCoordinates( StreamTokenizer stream, ArrayList lats, ArrayList lons) throws IOException, ParseException { boolean isOpenParen = false; if (isNumberNext(stream) || (isOpenParen = nextWord(stream).equals(LPAREN))) { parseCoordinate(stream, lats, lons); } while (nextCloserOrComma(stream).equals(COMMA)) { isOpenParen = false; if (isNumberNext(stream) || (isOpenParen = nextWord(stream).equals(LPAREN))) { parseCoordinate(stream, lats, lons); } if (isOpenParen && nextCloser(stream).equals(RPAREN) == false) { throw new ParseException( "expected: [" + RPAREN + "] but found: [" + tokenString(stream) + "]", stream.lineno()); } } if (isOpenParen && nextCloser(stream).equals(RPAREN) == false) { throw new ParseException( "expected: [" + RPAREN + "] but found: [" + tokenString(stream) + "]", stream.lineno()); } } /** parses a single coordinate, w/ optional 3rd dimension */ private static void parseCoordinate( StreamTokenizer stream, ArrayList lats, ArrayList lons) throws IOException, ParseException { lons.add(nextNumber(stream)); lats.add(nextNumber(stream)); if (isNumberNext(stream)) { nextNumber(stream); } } /** parses a MULTIPOINT type */ private static double[][] parseMultiPoint(StreamTokenizer stream) throws IOException, ParseException { String token = nextEmptyOrOpen(stream); if (token.equals(EMPTY)) { return null; } ArrayList lats = new ArrayList<>(); ArrayList lons = new ArrayList<>(); parseCoordinates(stream, lats, lons); double[][] result = new double[lats.size()][2]; for (int i = 0; i < lats.size(); ++i) { result[i] = new double[] {lons.get(i), lats.get(i)}; } return result; } /** parses a LINESTRING */ private static Line parseLine(StreamTokenizer stream) throws IOException, ParseException { String token = nextEmptyOrOpen(stream); if (token.equals(EMPTY)) { return null; } ArrayList lats = new ArrayList<>(); ArrayList lons = new ArrayList<>(); parseCoordinates(stream, lats, lons); return new Line( lats.stream().mapToDouble(i -> i).toArray(), lons.stream().mapToDouble(i -> i).toArray()); } /** parses a MULTILINESTRING */ private static Line[] parseMultiLine(StreamTokenizer stream) throws IOException, ParseException { String token = nextEmptyOrOpen(stream); if (token.equals(EMPTY)) { return null; } ArrayList lines = new ArrayList<>(); lines.add(parseLine(stream)); while (nextCloserOrComma(stream).equals(COMMA)) { lines.add(parseLine(stream)); } return lines.toArray(new Line[0]); } /** parses the hole of a polygon */ private static Polygon parsePolygonHole(StreamTokenizer stream) throws IOException, ParseException { ArrayList lats = new ArrayList<>(); ArrayList lons = new ArrayList<>(); parseCoordinates(stream, lats, lons); return new Polygon( lats.stream().mapToDouble(i -> i).toArray(), lons.stream().mapToDouble(i -> i).toArray()); } /** parses a POLYGON */ private static Polygon parsePolygon(StreamTokenizer stream) throws IOException, ParseException { if (nextEmptyOrOpen(stream).equals(EMPTY)) { return null; } nextOpener(stream); ArrayList lats = new ArrayList<>(); ArrayList lons = new ArrayList<>(); parseCoordinates(stream, lats, lons); ArrayList holes = new ArrayList<>(); while (nextCloserOrComma(stream).equals(COMMA)) { holes.add(parsePolygonHole(stream)); } if (holes.isEmpty() == false) { return new Polygon( lats.stream().mapToDouble(i -> i).toArray(), lons.stream().mapToDouble(i -> i).toArray(), holes.toArray(new Polygon[0])); } return new Polygon( lats.stream().mapToDouble(i -> i).toArray(), lons.stream().mapToDouble(i -> i).toArray()); } /** parses a MULTIPOLYGON */ private static Polygon[] parseMultiPolygon(StreamTokenizer stream) throws IOException, ParseException { String token = nextEmptyOrOpen(stream); if (token.equals(EMPTY)) { return null; } ArrayList polygons = new ArrayList<>(); polygons.add(parsePolygon(stream)); while (nextCloserOrComma(stream).equals(COMMA)) { polygons.add(parsePolygon(stream)); } return polygons.toArray(new Polygon[0]); } /** parses an ENVELOPE */ private static Rectangle parseBBox(StreamTokenizer stream) throws IOException, ParseException { if (nextEmptyOrOpen(stream).equals(EMPTY)) { return null; } double minLon = nextNumber(stream); nextComma(stream); double maxLon = nextNumber(stream); nextComma(stream); double maxLat = nextNumber(stream); nextComma(stream); double minLat = nextNumber(stream); nextCloser(stream); return new Rectangle(minLat, maxLat, minLon, maxLon); } /** parses a GEOMETRYCOLLECTION */ private static Object[] parseGeometryCollection(StreamTokenizer stream) throws IOException, ParseException { if (nextEmptyOrOpen(stream).equals(EMPTY)) { return null; } ArrayList geometries = new ArrayList<>(); geometries.add(parseGeometry(stream, ShapeType.GEOMETRYCOLLECTION)); while (nextCloserOrComma(stream).equals(COMMA)) { geometries.add(parseGeometry(stream, null)); } return geometries.toArray(new Object[0]); } /** next word in the stream */ private static String nextWord(StreamTokenizer stream) throws ParseException, IOException { switch (stream.nextToken()) { case StreamTokenizer.TT_WORD: final String word = stream.sval; return word.equalsIgnoreCase(EMPTY) ? EMPTY : word; case '(': return LPAREN; case ')': return RPAREN; case ',': return COMMA; } throw new ParseException("expected word but found: " + tokenString(stream), stream.lineno()); } /** next number in the stream */ private static double nextNumber(StreamTokenizer stream) throws IOException, ParseException { if (stream.nextToken() == StreamTokenizer.TT_WORD) { if (stream.sval.equalsIgnoreCase(NAN)) { return Double.NaN; } else { try { return Double.parseDouble(stream.sval); } catch ( @SuppressWarnings("unused") NumberFormatException e) { throw new ParseException("invalid number found: " + stream.sval, stream.lineno()); } } } throw new ParseException("expected number but found: " + tokenString(stream), stream.lineno()); } /** next token in the stream */ private static String tokenString(StreamTokenizer stream) { switch (stream.ttype) { case StreamTokenizer.TT_WORD: return stream.sval; case StreamTokenizer.TT_EOF: return EOF; case StreamTokenizer.TT_EOL: return EOL; case StreamTokenizer.TT_NUMBER: return NUMBER; } return "'" + (char) stream.ttype + "'"; } /** checks if the next token is a number */ private static boolean isNumberNext(StreamTokenizer stream) throws IOException { final int type = stream.nextToken(); stream.pushBack(); return type == StreamTokenizer.TT_WORD; } /** checks if next token is an EMPTY or open paren */ private static String nextEmptyOrOpen(StreamTokenizer stream) throws IOException, ParseException { final String next = nextWord(stream); if (next.equals(EMPTY) || next.equals(LPAREN)) { return next; } throw new ParseException( "expected " + EMPTY + " or " + LPAREN + " but found: " + tokenString(stream), stream.lineno()); } /** checks if next token is a closing paren */ private static String nextCloser(StreamTokenizer stream) throws IOException, ParseException { if (nextWord(stream).equals(RPAREN)) { return RPAREN; } throw new ParseException( "expected " + RPAREN + " but found: " + tokenString(stream), stream.lineno()); } /** expects a comma as next token */ private static String nextComma(StreamTokenizer stream) throws IOException, ParseException { if (nextWord(stream).equals(COMMA)) { return COMMA; } throw new ParseException( "expected " + COMMA + " but found: " + tokenString(stream), stream.lineno()); } /** expects an open RPAREN as the next toke */ private static String nextOpener(StreamTokenizer stream) throws IOException, ParseException { if (nextWord(stream).equals(LPAREN)) { return LPAREN; } throw new ParseException( "expected " + LPAREN + " but found: " + tokenString(stream), stream.lineno()); } /** expects either a closing LPAREN or comma as the next token */ private static String nextCloserOrComma(StreamTokenizer stream) throws IOException, ParseException { String token = nextWord(stream); if (token.equals(COMMA) || token.equals(RPAREN)) { return token; } throw new ParseException( "expected " + COMMA + " or " + RPAREN + " but found: " + tokenString(stream), stream.lineno()); } /** next word in the stream */ private static void checkEOF(StreamTokenizer stream) throws ParseException, IOException { if (stream.nextToken() != StreamTokenizer.TT_EOF) { throw new ParseException( "expected end of WKT string but found additional text: " + tokenString(stream), stream.lineno()); } } /** Enumerated type for Shapes */ public enum ShapeType { POINT("point"), MULTIPOINT("multipoint"), LINESTRING("linestring"), MULTILINESTRING("multilinestring"), POLYGON("polygon"), MULTIPOLYGON("multipolygon"), GEOMETRYCOLLECTION("geometrycollection"), ENVELOPE("envelope"); // not part of the actual WKB spec private final String shapeName; private static final Map shapeTypeMap; private static final String BBOX = "BBOX"; static { Map shapeTypes = new HashMap<>(); for (ShapeType type : values()) { shapeTypes.put(type.shapeName, type); } shapeTypes.put(ENVELOPE.wktName().toLowerCase(Locale.ROOT), ENVELOPE); shapeTypeMap = Collections.unmodifiableMap(shapeTypes); } ShapeType(String shapeName) { this.shapeName = shapeName; } String typename() { return shapeName; } /** wkt shape name */ public String wktName() { return this == ENVELOPE ? BBOX : this.shapeName; } public static ShapeType forName(String shapename) { String typename = shapename.toLowerCase(Locale.ROOT); ShapeType type = shapeTypeMap.get(typename); if (type != null) { return type; } throw new IllegalArgumentException("unknown geo_shape [" + shapename + "]"); } } }