
geotrellis.spark.io.slippy.FileSlippyTileReader.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of geotrellis-spark_2.10 Show documentation
Show all versions of geotrellis-spark_2.10 Show documentation
GeoTrellis is an open source geographic data processing engine for high performance applications.
The newest version!
package geotrellis.spark.io.slippy
import geotrellis.vector._
import geotrellis.raster._
import geotrellis.raster.io.geotiff._
import geotrellis.spark._
import geotrellis.spark.io.hadoop._
import geotrellis.spark.io.hadoop.formats._
import geotrellis.util.Filesystem
import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter._
import org.apache.spark._
import org.apache.spark.rdd._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext}
import org.apache.hadoop.fs.Path
import java.io._
import scala.collection.JavaConversions._
class FileSlippyTileReader[T](uri: String, extensions: Seq[String] = Seq())(fromBytes: (SpatialKey, Array[Byte]) => T) extends SlippyTileReader[T] {
import SlippyTileReader.TilePath
private def listFiles(path: String): Seq[File] =
listFiles(new File(path))
private def listFiles(file: File): Seq[File] =
if(extensions.isEmpty) { FileUtils.listFiles(file, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).toSeq }
else { FileUtils.listFiles(file, new SuffixFileFilter(extensions), TrueFileFilter.INSTANCE).toSeq }
def read(zoom: Int, key: SpatialKey): T = {
val dir = new File(uri, s"$zoom/${key.col}/")
val lFromBytes = fromBytes
listFiles(dir).filter { f => f.getName.startsWith(s"${key.row}") } match {
case Seq() => throw new FileNotFoundException(s"${dir}/${key.row}*")
case Seq(tilePath) => lFromBytes(key, Filesystem.slurp(tilePath.getAbsolutePath))
case _ => throw new IllegalArgumentException(s"More than one file matches path ${dir}/${key.row}*")
}
}
def read(zoom: Int)(implicit sc: SparkContext): RDD[(SpatialKey, T)] = {
val paths = {
listFiles(new File(uri, zoom.toString).getPath)
.flatMap { file =>
val path = file.getAbsolutePath
path match {
case TilePath(x, y) => Some((SpatialKey(x.toInt, y.toInt), path))
case _ => None
}
}
}
val lFromBytes = fromBytes
val numPartitions = math.min(paths.size, math.max(paths.size / 10, 50)).toInt
sc.parallelize(paths.toSeq)
.partitionBy(new HashPartitioner(numPartitions))
.mapPartitions({ partition =>
partition.map { case (key, path) => (key, lFromBytes(key, Filesystem.slurp(path))) }
}, true)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy