
org.memeticlabs.spark.rdd.trycatch.TryCatchRDDFunctions.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-rdd-trycatch Show documentation
Show all versions of spark-rdd-trycatch Show documentation
Error trapping and handling functionality for Spark's RDD API
The newest version!
/**
* Copyright 2017 Tristan Nixon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Created by Tristan Nixon on 6/28/17.
*/
package org.memeticlabs.spark.rdd.trycatch
import scala.Function._
import scala.language.implicitConversions
import scala.reflect.ClassTag
import org.apache.spark.{HashPartitioner, Partitioner}
import org.apache.spark.Partitioner._
import org.apache.spark.rdd.RDD
import org.memeticlabs.spark.rdd.trycatch.TryCatchHelpers._
/**
* RDD functions with try-catch error handling
*/
private[memeticlabs] class TryCatchRDDFunctions[T](rdd: RDD[T] )(implicit tt: ClassTag[T])
extends Serializable
{
private def trySuccess[U: ClassTag]( f: T => U,
errorHandler: errorHandlerFn[T] ): RDD[TryCatchResultWithInput[T, U]] =
rdd.map( tryCatchResult( f, errorHandler ) ).filter( _.isSuccess )
/** Transformations (return a new RDD) */
/**
* map with error-handling
*/
def tryMap[U: ClassTag]( f: T => U,
errorHandler: errorHandlerFn[T] ): RDD[U] =
trySuccess( f, errorHandler ).map( _.getResult )
/**
* flat-map with error-handling
*/
def tryFlatMap[U: ClassTag]( f: T => TraversableOnce[U],
errorHandler: errorHandlerFn[T] ): RDD[U] =
trySuccess( f, errorHandler ).flatMap( _.getResult )
/**
* filter with error-handling
*/
def tryFilter( f: ( T ) => Boolean,
errorHandler: errorHandlerFn[T] ): RDD[T] =
trySuccess(f, errorHandler).filter( _.getResult ).map( _.getInput )
/**
* key-by with error-handling
*/
def tryKeyBy[K]( f: T => K,
errorHandler: errorHandlerFn[T] )
(implicit kt: ClassTag[K]): RDD[(K, T)] =
trySuccess( f, errorHandler ).keyBy( _.getResult ).mapValues( _.getInput )
def tryGroupBy[K]( f: T => K, errorHandler: errorHandlerFn[T])
(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] =
tryGroupBy( f, defaultPartitioner(rdd), errorHandler )
def tryGroupBy[K]( f: T => K, numPartitions: Int, errorHandler: errorHandlerFn[T])
(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] =
tryGroupBy( f, new HashPartitioner(numPartitions), errorHandler )
/**
* group-by with error-handling
*/
def tryGroupBy[K]( f: T => K,
p: Partitioner,
errorHandler: errorHandlerFn[T] )
(implicit kt: ClassTag[K], ord: Ordering[K] = null): RDD[(K, Iterable[T])] =
trySuccess( f, errorHandler ).map( tr => ( tr.getResult, tr.getInput ) ).groupByKey(p)
def tryMapPartitions[U: ClassTag]( f: Iterator[T] => Iterator[U],
errorHandler: errorHandlerFn[Iterator[T]],
preservesPartitioning: Boolean = false ): RDD[U] =
rdd.mapPartitions( tryCatchAndHandle( f, Iterator[U](), errorHandler ),
preservesPartitioning )
def tryMapPartitionsWithIndex[U: ClassTag]( f: (Int, Iterator[T]) => Iterator[U],
errorHandler: errorHandlerFn[(Int, Iterator[T])],
preservesPartitioning: Boolean = false ): RDD[U] =
rdd.mapPartitionsWithIndex[U]( untupled(tryCatchAndHandle( f.tupled, Iterator[U](), errorHandler )),
preservesPartitioning )
def tryZipPartitions[B: ClassTag, V: ClassTag]( rdd2: RDD[B],
preservesPartitioning: Boolean )
( f: (Iterator[T], Iterator[B]) => Iterator[V],
errorHandler: errorHandlerFn[(Iterator[T], Iterator[B])] ): RDD[V] =
rdd.zipPartitions( rdd2, preservesPartitioning
)( untupled(tryCatchAndHandle( f.tupled,
Iterator[V](),
errorHandler )))
def tryZipPartitions[B: ClassTag, C: ClassTag, V: ClassTag]( rdd2: RDD[B],
rdd3: RDD[C],
preservesPartitioning: Boolean )
( f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V],
errorHandler: errorHandlerFn[(Iterator[T], Iterator[B], Iterator[C])] ): RDD[V] =
rdd.zipPartitions( rdd2, rdd3, preservesPartitioning
)( untupled(tryCatchAndHandle( f.tupled,
Iterator[V](),
errorHandler )))
def tryZipPartitions[B: ClassTag, C: ClassTag, D: ClassTag, V: ClassTag]( rdd2: RDD[B],
rdd3: RDD[C],
rdd4: RDD[D],
preservesPartitioning: Boolean)
( f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
errorHandler: errorHandlerFn[(Iterator[T], Iterator[B], Iterator[C], Iterator[D])] ): RDD[V] =
rdd.zipPartitions( rdd2, rdd3, rdd4, preservesPartitioning
)( untupled(tryCatchAndHandle( f.tupled,
Iterator[V](),
errorHandler )))
/** Actions */
def tryForeach( f: T => Unit, errorHandler: errorHandlerFn[T] ): Unit =
rdd.foreach( tryCatchAndHandle(f, errorHandler) )
def tryForeachPartition(f: Iterator[T] => Unit, errorHandler: errorHandlerFn[Iterator[T]] ): Unit =
rdd.foreachPartition( tryCatchAndHandle( f, errorHandler ) )
def tryReduce(zeroValue: T)( f: (T, T) => T, errorHandler: errorHandlerFn[(T, T)] ): T =
rdd.reduce( untupled( tryCatchAndHandle( f.tupled, zeroValue, errorHandler ) ) )
def tryTreeReduce(zeroValue: T)(f: (T, T) => T, depth: Int = 2, errorHandler: errorHandlerFn[(T, T)] ): T =
rdd.treeReduce( untupled( tryCatchAndHandle( f.tupled, zeroValue, errorHandler ) ), depth )
def tryFold(zeroValue: T)( op: (T, T) => T, errorHandler: errorHandlerFn[(T, T)] ): T =
rdd.fold(zeroValue)( untupled(tryCatchAndHandle( op.tupled, zeroValue, errorHandler )) )
def tryAggregate[U: ClassTag](zeroValue: U)( seqOp: (U, T) => U,
seqErrorHandler: errorHandlerFn[(U, T)],
combOp: (U, U) => U,
combErrorHandler: errorHandlerFn[(U, U)] ): U =
{
val seqFn = untupled(tryCatchAndHandle( seqOp.tupled, zeroValue, seqErrorHandler ))
val combFn = untupled(tryCatchAndHandle( combOp.tupled, zeroValue, combErrorHandler ))
rdd.aggregate(zeroValue)( seqFn, combFn )
}
def tryTreeAggregate[U: ClassTag](zeroValue: U)( seqOp: (U, T) => U,
seqErrorHandler: errorHandlerFn[(U, T)],
combOp: (U, U) => U,
combErrorHandler: errorHandlerFn[(U, U)],
depth: Int = 2): U =
{
val seqFn = untupled(tryCatchAndHandle( seqOp.tupled, zeroValue, seqErrorHandler ))
val combFn = untupled(tryCatchAndHandle( combOp.tupled, zeroValue, combErrorHandler ))
rdd.treeAggregate(zeroValue)( seqFn, combFn, depth )
}
}
object TryCatchRDDFunctions
{
implicit def rddToTryCatchRDDFunctions[T]( rdd: RDD[T] )
(implicit tt: ClassTag[T]): TryCatchRDDFunctions[T] =
new TryCatchRDDFunctions[T]( rdd )
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy