All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.memeticlabs.spark.rdd.trycatch.TryCatchPairRDDFunctions.scala Maven / Gradle / Ivy

The newest version!
/**
	* Copyright 2017 Tristan Nixon
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	* Created by Tristan Nixon on 7/6/17.
	*/
package org.memeticlabs.spark.rdd.trycatch

import scala.collection.Map
import scala.language.implicitConversions
import scala.reflect.ClassTag

import org.apache.spark.Partitioner
import org.apache.spark.rdd.RDD
import org.apache.spark.serializer.Serializer

import org.memeticlabs.spark.rdd.trycatch.TryCatchHelpers._

private[memeticlabs] class TryCatchPairRDDFunctions[K, V]( rdd: RDD[(K, V)] )
                                                         ( implicit kt: ClassTag[K],
                                                           vt: ClassTag[V],
                                                           ord: Ordering[K] = null )
	extends Serializable
{
	def tryCombineByKeyWithClassTag[C]( zeroValue: C )
	                                  ( createCombiner: V => C,
	                                    mergeValue: (C, V) => C,
	                                    mergeCombiners: (C, C) => C,
	                                    partitioner: Partitioner,
	                                    mapSideCombine: Boolean = true,
	                                    serializer: Serializer = null,
	                                    errorHandler: errorHandlerFn[Any] )
	                                  (implicit ct: ClassTag[C]): RDD[(K, C)] =
		rdd.combineByKeyWithClassTag( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
		                              tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
		                              tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
		                              partitioner, mapSideCombine, serializer)(ct)

	def tryCombineByKey[C]( zeroValue: C )
	                      ( createCombiner: V => C,
	                        mergeValue: (C, V) => C,
	                        mergeCombiners: (C, C) => C,
	                        partitioner: Partitioner,
	                        mapSideCombine: Boolean = true,
	                        serializer: Serializer = null,
	                        errorHandler: errorHandlerFn[Any] )
	                      (implicit ct: ClassTag[C]): RDD[(K, C)] =
		rdd.combineByKey( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
		                  tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
		                  tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
		                  partitioner, mapSideCombine, serializer )

	def tryCombineByKeyWithClassTag[C]( zeroValue: C )
	                                  ( createCombiner: V => C,
	                                    mergeValue: (C, V) => C,
	                                    mergeCombiners: (C, C) => C,
	                                    numPartitions: Int,
	                                    errorHandler: errorHandlerFn[Any] )
	                                  (implicit ct: ClassTag[C]): RDD[(K, C)] =
		rdd.combineByKeyWithClassTag( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
		                              tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
		                              tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
		                              numPartitions )(ct)

	def tryCombineByKey[C]( zeroValue: C )
	                      ( createCombiner: V => C,
	                        mergeValue: (C, V) => C,
	                        mergeCombiners: (C, C) => C,
	                        numPartitions: Int,
	                        errorHandler: errorHandlerFn[Any] )
	                      (implicit ct: ClassTag[C]): RDD[(K, C)] =
		rdd.combineByKey( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
		                  tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
		                  tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ),
		                  numPartitions )

	def tryCombineByKeyWithClassTag[C]( zeroValue: C )
	                                  ( createCombiner: V => C,
	                                    mergeValue: (C, V) => C,
	                                    mergeCombiners: (C, C) => C,
	                                    errorHandler: errorHandlerFn[Any] )
	                                  (implicit ct: ClassTag[C]): RDD[(K, C)] =
		rdd.combineByKeyWithClassTag( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
		                              tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
		                              tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ) )(ct)

	def tryCombineByKey[C]( zeroValue: C )
	                      ( createCombiner: V => C,
	                        mergeValue: (C, V) => C,
	                        mergeCombiners: (C, C) => C,
	                        errorHandler: errorHandlerFn[Any] )
	                      (implicit ct: ClassTag[C]): RDD[(K, C)] =
		rdd.combineByKey( tryCatchAndHandle[V,C]( createCombiner, zeroValue, errorHandler ),
		                  tryCatchAndHandle[C,V]( mergeValue, zeroValue, errorHandler ),
		                  tryCatchAndHandle[C]( mergeCombiners, zeroValue, errorHandler ) )

	def tryAggregateByKey[U: ClassTag]( zeroValue: U,
	                                    partitioner: Partitioner )
	                                  ( seqOp: (U, V) => U,
	                                    combOp: (U, U) => U,
	                                    errorHandler: errorHandlerFn[Any] ): RDD[(K, U)] =
		rdd.aggregateByKey(zeroValue, partitioner)( tryCatchAndHandle[U,V]( seqOp, zeroValue, errorHandler ),
		                                            tryCatchAndHandle[U]( combOp, zeroValue, errorHandler ) )

	def tryAggregateByKey[U: ClassTag]( zeroValue: U,
	                                    numPartitions: Int )
	                                  ( seqOp: (U, V) => U,
	                                    combOp: (U, U) => U,
	                                    errorHandler: errorHandlerFn[Any] ): RDD[(K, U)] =
		rdd.aggregateByKey(zeroValue, numPartitions)( tryCatchAndHandle[U,V]( seqOp, zeroValue, errorHandler ),
		                                              tryCatchAndHandle[U]( combOp, zeroValue, errorHandler ) )

	def tryAggregateByKey[U: ClassTag]( zeroValue: U)
	                                  ( seqOp: (U, V) => U,
	                                    combOp: (U, U) => U,
	                                    errorHandler: errorHandlerFn[Any] ): RDD[(K, U)] =
		rdd.aggregateByKey(zeroValue)( tryCatchAndHandle[U,V]( seqOp, zeroValue, errorHandler ),
		                               tryCatchAndHandle[U]( combOp, zeroValue, errorHandler ) )

	def tryFoldByKey( zeroValue: V, partitioner: Partitioner )
	                ( func: (V, V) => V,
	                  errorHandler: errorHandlerFn[(V,V)] ): RDD[(K, V)] =
		rdd.foldByKey(zeroValue, partitioner)( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )

	def tryFoldByKey( zeroValue: V, numPartitions: Int )
	                ( func: (V, V) => V,
	                  errorHandler: errorHandlerFn[(V,V)] ): RDD[(K, V)] =
		rdd.foldByKey(zeroValue, numPartitions)( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )

	def tryFoldByKey( zeroValue: V )
	                ( func: (V, V) => V,
	                  errorHandler: errorHandlerFn[(V,V)] ): RDD[(K, V)] =
		rdd.foldByKey(zeroValue)( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )

	def tryReduceByKey( zeroValue: V )
	                  ( partitioner: Partitioner,
	                    func: (V, V) => V,
	                    errorHandler: errorHandlerFn[(V, V)] ): RDD[(K, V)] =
		rdd.reduceByKey( partitioner, tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )

	def tryReduceByKey( zeroValue: V )
	                  ( func: (V, V) => V,
	                    errorHandler: errorHandlerFn[(V, V)],
	                    numPartitions: Int ): RDD[(K, V)] =
		rdd.reduceByKey( tryCatchAndHandle[V]( func, zeroValue, errorHandler ), numPartitions )

	def tryReduceByKey( zeroValue: V )
	                  ( func: (V, V) => V,
	                    errorHandler: errorHandlerFn[(V, V)] ): RDD[(K, V)] =
		rdd.reduceByKey( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )

	def tryReduceByKeyLocally( zeroValue: V )
	                         ( func: (V, V) => V,
	                           errorHandler: errorHandlerFn[(V, V)] ): Map[K, V] =
		rdd.reduceByKeyLocally( tryCatchAndHandle[V]( func, zeroValue, errorHandler ) )

	def tryMapValues[U]( f: V => U,
	                     errorHandler: errorHandlerFn[V] )
	                   (implicit ut: ClassTag[U]): RDD[(K, U)] =
		rdd.mapValues( tryCatchResult( f, errorHandler ) )
		   .filter( _._2.isSuccess )
		   .mapValues( _.getResult )

	def tryFlatMapValues[U]( f: V => TraversableOnce[U],
	                         errorHandler: errorHandlerFn[V] )
	                       (implicit ut: ClassTag[U]): RDD[(K, U)] =
		rdd.mapValues( tryCatchResult( f, errorHandler ) )
		   .filter( _._2.isSuccess )
		   .flatMapValues( _.getResult )
}

object TryCatchPairRDDFunctions
{
	implicit def pairRDDToTryCatchPairRDDFunctions[K,V]( pairRDD: RDD[(K,V)] )
	                                                   ( implicit kt: ClassTag[K], vt: ClassTag[V] ) =
		new TryCatchPairRDDFunctions[K,V](pairRDD)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy