
org.memeticlabs.spark.rdd.trycatch.TryCatchPairRDDFunctions.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-rdd-trycatch Show documentation
Show all versions of spark-rdd-trycatch Show documentation
Error trapping and handling functionality for Spark's RDD API
The newest version!
/**
* Copyright 2017 Tristan Nixon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Created by Tristan Nixon on 7/6/17.
*/
package org.memeticlabs.spark.rdd.trycatch
import scala.collection.Map
import scala.language.implicitConversions
import scala.reflect.ClassTag
import org.apache.spark.Partitioner
import org.apache.spark.rdd.RDD
import org.apache.spark.serializer.Serializer
import org.memeticlabs.spark.rdd.trycatch.TryCatchHelpers._
private[memeticlabs] class TryCatchPairRDDFunctions[K, V]( rdd: RDD[(K, V)] )
( implicit kt: ClassTag[K],
vt: ClassTag[V],
ord: Ordering[K] = null )
extends Serializable
{
def tryCombineByKeyWithClassTag[C]( zeroValue: C )
( createCombiner: V => C,
mergeValue: (C, V) => C,
mergeCombiners: (C, C) => C,
partitioner: Partitioner,
mapSideCombine: Boolean = true,
serializer: Serializer = null,
errorHandler: errorHandlerFn[Any] )
(implicit ct: ClassTag[C]): RDD[(K, C)] =
rdd.combineByKeyWithClassTag( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
partitioner, mapSideCombine, serializer)(ct)
def tryCombineByKey[C]( zeroValue: C )
( createCombiner: V => C,
mergeValue: (C, V) => C,
mergeCombiners: (C, C) => C,
partitioner: Partitioner,
mapSideCombine: Boolean = true,
serializer: Serializer = null,
errorHandler: errorHandlerFn[Any] )
(implicit ct: ClassTag[C]): RDD[(K, C)] =
rdd.combineByKey( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
partitioner, mapSideCombine, serializer )
def tryCombineByKeyWithClassTag[C]( zeroValue: C )
( createCombiner: V => C,
mergeValue: (C, V) => C,
mergeCombiners: (C, C) => C,
numPartitions: Int,
errorHandler: errorHandlerFn[Any] )
(implicit ct: ClassTag[C]): RDD[(K, C)] =
rdd.combineByKeyWithClassTag( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
numPartitions )(ct)
def tryCombineByKey[C]( zeroValue: C )
( createCombiner: V => C,
mergeValue: (C, V) => C,
mergeCombiners: (C, C) => C,
numPartitions: Int,
errorHandler: errorHandlerFn[Any] )
(implicit ct: ClassTag[C]): RDD[(K, C)] =
rdd.combineByKey( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
numPartitions )
def tryCombineByKeyWithClassTag[C]( zeroValue: C )
( createCombiner: V => C,
mergeValue: (C, V) => C,
mergeCombiners: (C, C) => C,
errorHandler: errorHandlerFn[Any] )
(implicit ct: ClassTag[C]): RDD[(K, C)] =
rdd.combineByKeyWithClassTag( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ) )(ct)
def tryCombineByKey[C]( zeroValue: C )
( createCombiner: V => C,
mergeValue: (C, V) => C,
mergeCombiners: (C, C) => C,
errorHandler: errorHandlerFn[Any] )
(implicit ct: ClassTag[C]): RDD[(K, C)] =
rdd.combineByKey( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ) )
def tryAggregateByKey[U: ClassTag]( zeroValue: U,
partitioner: Partitioner )
( seqOp: (U, V) => U,
combOp: (U, U) => U,
errorHandler: errorHandlerFn[Any] ): RDD[(K, U)] =
rdd.aggregateByKey(zeroValue, partitioner)( tryCatchAndHandle[U,V]( seqOp, zeroValue, errorHandler ),
tryCatchAndHandle[U]( combOp, zeroValue, errorHandler ) )
def tryAggregateByKey[U: ClassTag]( zeroValue: U,
numPartitions: Int )
( seqOp: (U, V) => U,
combOp: (U, U) => U,
errorHandler: errorHandlerFn[Any] ): RDD[(K, U)] =
rdd.aggregateByKey(zeroValue, numPartitions)( tryCatchAndHandle[U,V]( seqOp, zeroValue, errorHandler ),
tryCatchAndHandle[U]( combOp, zeroValue, errorHandler ) )
def tryAggregateByKey[U: ClassTag]( zeroValue: U)
( seqOp: (U, V) => U,
combOp: (U, U) => U,
errorHandler: errorHandlerFn[Any] ): RDD[(K, U)] =
rdd.aggregateByKey(zeroValue)( tryCatchAndHandle[U,V]( seqOp, zeroValue, errorHandler ),
tryCatchAndHandle[U]( combOp, zeroValue, errorHandler ) )
def tryFoldByKey( zeroValue: V, partitioner: Partitioner )
( func: (V, V) => V,
errorHandler: errorHandlerFn[(V,V)] ): RDD[(K, V)] =
rdd.foldByKey(zeroValue, partitioner)( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )
def tryFoldByKey( zeroValue: V, numPartitions: Int )
( func: (V, V) => V,
errorHandler: errorHandlerFn[(V,V)] ): RDD[(K, V)] =
rdd.foldByKey(zeroValue, numPartitions)( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )
def tryFoldByKey( zeroValue: V )
( func: (V, V) => V,
errorHandler: errorHandlerFn[(V,V)] ): RDD[(K, V)] =
rdd.foldByKey(zeroValue)( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )
def tryReduceByKey( zeroValue: V )
( partitioner: Partitioner,
func: (V, V) => V,
errorHandler: errorHandlerFn[(V, V)] ): RDD[(K, V)] =
rdd.reduceByKey( partitioner, tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )
def tryReduceByKey( zeroValue: V )
( func: (V, V) => V,
errorHandler: errorHandlerFn[(V, V)],
numPartitions: Int ): RDD[(K, V)] =
rdd.reduceByKey( tryCatchAndHandle[V]( func, zeroValue, errorHandler ), numPartitions )
def tryReduceByKey( zeroValue: V )
( func: (V, V) => V,
errorHandler: errorHandlerFn[(V, V)] ): RDD[(K, V)] =
rdd.reduceByKey( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )
def tryReduceByKeyLocally( zeroValue: V )
( func: (V, V) => V,
errorHandler: errorHandlerFn[(V, V)] ): Map[K, V] =
rdd.reduceByKeyLocally( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )
def tryMapValues[U]( f: V => U,
errorHandler: errorHandlerFn[V] )
(implicit ut: ClassTag[U]): RDD[(K, U)] =
rdd.mapValues( tryCatchResult( f, errorHandler ) )
.filter( _._2.isSuccess )
.mapValues( _.getResult )
def tryFlatMapValues[U]( f: V => TraversableOnce[U],
errorHandler: errorHandlerFn[V] )
(implicit ut: ClassTag[U]): RDD[(K, U)] =
rdd.mapValues( tryCatchResult( f, errorHandler ) )
.filter( _._2.isSuccess )
.flatMapValues( _.getResult )
}
object TryCatchPairRDDFunctions
{
implicit def pairRDDToTryCatchPairRDDFunctions[K,V]( pairRDD: RDD[(K,V)] )
( implicit kt: ClassTag[K], vt: ClassTag[V] ) =
new TryCatchPairRDDFunctions[K,V](pairRDD)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy