
vectorpipe.sources.AugmentedDiffMicroBatchReader.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of vectorpipe_2.11 Show documentation
Show all versions of vectorpipe_2.11 Show documentation
Import OSM data and output to VectorTiles with GeoTrellis.
The newest version!
package vectorpipe.sources
import java.net.URI
import java.util
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.sources.v2.DataSourceOptions
import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
import vectorpipe.model.AugmentedDiff
import scala.collection.JavaConverters._
import scala.compat.java8.OptionConverters._
case class AugmentedDiffStreamBatchTask(baseURI: URI, sequences: Seq[Int], handler: (Int, AugmentedDiffSource.RF) => Unit)
extends InputPartition[InternalRow] {
override def createPartitionReader(): InputPartitionReader[InternalRow] =
AugmentedDiffStreamBatchReader(baseURI, sequences, handler)
}
case class AugmentedDiffStreamBatchReader(baseURI: URI, sequences: Seq[Int], handler: (Int, AugmentedDiffSource.RF) => Unit)
extends ReplicationStreamBatchReader[AugmentedDiff](baseURI, sequences) {
override def getSequence(baseURI: URI, sequence: Int): Seq[AugmentedDiff] =
AugmentedDiffSource.getSequence(baseURI, sequence, handler)
}
case class AugmentedDiffMicroBatchReader(options: DataSourceOptions, checkpointLocation: String)
extends ReplicationStreamMicroBatchReader[AugmentedDiff](options, checkpointLocation)
with Logging {
override def getCurrentSequence: Option[Int] =
AugmentedDiffSource.getCurrentSequence(baseURI)
private def baseURI: URI =
options
.get(Source.BaseURI)
.asScala
.map(new URI(_))
.getOrElse(
throw new RuntimeException(
s"${Source.BaseURI} is a required option for ${Source.AugmentedDiffs}"
)
)
private def errorHandler: AugmentedDiffSourceErrorHandler = {
val handlerClass = options
.get(Source.ErrorHandler)
.asScala
.getOrElse("vectorpipe.sources.AugmentedDiffSourceErrorHandler")
val handler = Class.forName(handlerClass).newInstance.asInstanceOf[AugmentedDiffSourceErrorHandler]
handler.setOptions(options.asMap.asScala.toMap)
handler
}
override def planInputPartitions(): util.List[InputPartition[InternalRow]] =
sequenceRange
.map(seq =>
AugmentedDiffStreamBatchTask(baseURI, Seq(seq), errorHandler.handle).asInstanceOf[InputPartition[InternalRow]])
.asJava
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy