
geotrellis.spark.io.hadoop.formats.BinaryFileInputFormat.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of geotrellis-spark_2.11 Show documentation
Show all versions of geotrellis-spark_2.11 Show documentation
GeoTrellis is an open source geographic data processing engine for high performance applications.
The newest version!
package geotrellis.spark.io.hadoop.formats
import geotrellis.spark.io.hadoop._
import org.apache.hadoop.fs._
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input._
class BinaryFileRecordReader[K, V](read: Array[Byte] => (K, V)) extends RecordReader[K, V] {
private var tup: (K, V) = null
private var hasNext: Boolean = true
def initialize(split: InputSplit, context: TaskAttemptContext) = {
val path = split.asInstanceOf[FileSplit].getPath()
val conf = context.getConfiguration()
val bytes = HdfsUtils.readBytes(path, conf)
tup = read(bytes)
}
def close = {}
def getCurrentKey = tup._1
def getCurrentValue = { hasNext = false ; tup._2 }
def getProgress = 1
def nextKeyValue = hasNext
}
trait BinaryFileInputFormat[K, V] extends FileInputFormat[K, V] {
def read(bytes: Array[Byte], context: TaskAttemptContext): (K, V)
override def isSplitable(context: JobContext, fileName: Path) = false
override def createRecordReader(split: InputSplit, context: TaskAttemptContext): RecordReader[K, V] =
new BinaryFileRecordReader({ bytes => read(bytes, context) })
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy