
geotrellis.spark.io.file.FileRDDWriter.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of geotrellis-spark_2.11 Show documentation
Show all versions of geotrellis-spark_2.11 Show documentation
GeoTrellis is an open source geographic data processing engine for high performance applications.
The newest version!
package geotrellis.spark.io.file
import geotrellis.spark.io.avro.{AvroRecordCodec, AvroEncoder}
import geotrellis.spark.io.avro.codecs.KeyValueRecordCodec
import geotrellis.util.Filesystem
import org.apache.spark.rdd.RDD
import scala.reflect.ClassTag
import java.io.File
object FileRDDWriter {
def write[K: AvroRecordCodec: ClassTag, V: AvroRecordCodec: ClassTag](
rdd: RDD[(K, V)],
rootPath: String,
keyPath: K => String
): Unit = {
val codec = KeyValueRecordCodec[K, V]
val schema = codec.schema
val pathsToTiles =
// Call groupBy with numPartitions; if called without that argument or a partitioner,
// groupBy will reuse the partitioner on the parent RDD if it is set, which could be typed
// on a key type that may no longer by valid for the key type of the resulting RDD.
rdd.groupBy({ row => keyPath(row._1) }, numPartitions = rdd.partitions.length)
Filesystem.ensureDirectory(rootPath)
pathsToTiles.foreach { case (path, rows) =>
val bytes = AvroEncoder.toBinary(rows.toVector)(codec)
Filesystem.writeBytes(path, bytes)
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy