edu.ucr.cs.bdlab.beast.indexing.RTreeFeatureReader Maven / Gradle / Ivy
/*
* Copyright 2018 University of California, Riverside
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.ucr.cs.bdlab.beast.indexing;
import edu.ucr.cs.bdlab.beast.io.FeatureReader;
import edu.ucr.cs.bdlab.beast.io.SpatialInputFormat;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeND;
import edu.ucr.cs.bdlab.beast.geolite.Feature;
import edu.ucr.cs.bdlab.beast.geolite.GeometryReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.spark.beast.CRSServer;
import org.geotools.referencing.CRS;
import org.opengis.referencing.FactoryException;
import org.opengis.referencing.crs.CoordinateReferenceSystem;
import java.io.Closeable;
import java.io.IOException;
import java.util.Iterator;
/**
* Reads features from an R-tree-indexed file.
*/
@FeatureReader.Metadata(
description = "An R-tree locally indexed file for efficient range retrieval",
shortName = "rtree",
extension = ".rtree",
noSplit = true
)
public class RTreeFeatureReader extends FeatureReader {
/**A mutable key for all records*/
protected EnvelopeND key;
/**The value that is returned*/
protected Feature value;
/**Hadoop environment configuration*/
private Configuration conf;
protected Iterator results;
/**The file name to report in error messages*/
private String filename;
/**The geometry reader to read geometries from the R-tree. Configured to use the right SRID.*/
private GeometryReader reader;
/**The input to the file*/
private FSDataInputStream in;
/**The start position of the current tree*/
private long posCurrentTree;
/**The position of the start tree*/
private long posFirstTree;
/**The deserializer reads records from the R-tree*/
private RTreeGuttman.Deserializer featureDeserializer;
/**If the input should be filtered, these are the search coordinates*/
private double[] minCoord;
private double[] maxCoord;
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
this.conf = context.getConfiguration();
FileSplit fsplit = (FileSplit) split;
// Open the input file and read the header of the stored features
filename = fsplit.getPath().toString();
FileSystem fs = fsplit.getPath().getFileSystem(conf);
in = fs.open(fsplit.getPath());
in.seek(fsplit.getStart());
value = new Feature();
value.readHeader(in);
String wkt = in.readUTF();
int srid;
if (wkt.isEmpty())
srid = 0;
else {
try {
CoordinateReferenceSystem crs = CRS.parseWKT(wkt);
srid = CRSServer.crsToSRID(crs, CRSServer.sparkConfFromHadoopConf(conf));
} catch (FactoryException e) {
srid = 4326;
}
}
reader = GeometryReader.getGeometryReader(srid);
// The current position is where the reading should stop (starting from the end)
posFirstTree = in.getPos();
posCurrentTree = fsplit.getStart() + fsplit.getLength();
// Now, either read the entire file, or filter based on the MBR
String filterMBRStr = conf.get(SpatialInputFormat.FilterMBR);
if (filterMBRStr != null) {
// Filter based on the MBR
String[] parts = filterMBRStr.split(",");
assert parts.length % 2 == 0; // It has to be an even number
int numDimensions = parts.length / 2;
minCoord = new double[numDimensions];
maxCoord = new double[numDimensions];
for (int d$ = 0; d$ < numDimensions; d$++) {
minCoord[d$] = Double.parseDouble(parts[d$]);
maxCoord[d$] = Double.parseDouble(parts[numDimensions + d$]);
}
}
// Create the deserializer of geometries
featureDeserializer = input -> {
try {
Feature newvalue = new Feature();
newvalue.copyAttributeMetadata(value);
newvalue.readValue(input, reader);
return newvalue;
} catch (Exception e) {
throw new RuntimeException("Error reading feature from file "+filename, e);
}
};
readPreviousRTree();
}
/**
* Read the previous R-tree. The file is read form the end to the beginning.
*/
private void readPreviousRTree() throws IOException {
assert posCurrentTree > posFirstTree :
String.format("Cannot seek before tree at position %d while the start is at %d", posCurrentTree, posFirstTree);
// Get the tree length by subtracting the Feature header size
in.seek(posCurrentTree - 4);
int treeLength = in.readInt() + 4;
posCurrentTree -= treeLength;
in.seek(posCurrentTree);
if (minCoord != null) {
// Search using the given rectangle
results = RTreeGuttman.search(in, treeLength, minCoord, maxCoord, featureDeserializer).iterator();
} else {
// Read all records
results = RTreeGuttman.readAll(in, treeLength, featureDeserializer).iterator();
}
}
@Override
public boolean nextKeyValue() {
while (results.hasNext() || posCurrentTree > posFirstTree) {
if (results.hasNext()) {
value = results.next();
if (key == null)
key = new EnvelopeND(reader.getGeometryFactory());
else
key.setEmpty();
key.merge(value.getGeometry());
return true;
}
try {
readPreviousRTree();
} catch (IOException e) {
throw new RuntimeException("Error reading R-tree", e);
}
}
return false;
}
@Override
public EnvelopeND getCurrentKey() {
return key;
}
@Override
public Feature getCurrentValue() {
return value;
}
@Override
public float getProgress() throws IOException {
return results instanceof RTreeGuttman.DiskSearchIterator?
((RTreeGuttman.DiskSearchIterator) results).getProgress() : 0.1f;
}
@Override
public void close() throws IOException {
if (results != null)
((Closeable)results).close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy