
vectorpipe.sources.ChangesetReader.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of vectorpipe_2.11 Show documentation
Show all versions of vectorpipe_2.11 Show documentation
Import OSM data and output to VectorTiles with GeoTrellis.
The newest version!
package vectorpipe.sources
import java.net.URI
import java.util
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.sources.v2.DataSourceOptions
import org.apache.spark.sql.sources.v2.reader.InputPartition
import vectorpipe.model.Changeset
import scala.collection.JavaConverters._
import scala.util.Random
case class ChangesetReader(options: DataSourceOptions)
extends ReplicationReader[Changeset](options) {
override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
// prevent sequential diffs from being assigned to the same task
val sequences = Random.shuffle((startSequence to endSequence).toList)
sequences
.grouped(Math.max(1, sequences.length / partitionCount))
.toList
.map(
ChangesetStreamBatchTask(baseURI, _)
.asInstanceOf[InputPartition[InternalRow]]
)
.asJava
}
override protected def getCurrentSequence: Option[Int] =
ChangesetSource.getCurrentSequence(baseURI).map(_.sequence.toInt)
private def baseURI =
new URI(
options
.get(Source.BaseURI)
.orElse("https://planet.osm.org/replication/changesets/"))
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy