All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.memeticlabs.spark.rdd.trycatch.TryCatchRDDFunctions.scala Maven / Gradle / Ivy

The newest version!
/**
	* Copyright 2017 Tristan Nixon
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	* Created by Tristan Nixon on 6/28/17.
	*/
package org.memeticlabs.spark.rdd.trycatch

import scala.Function._
import scala.language.implicitConversions
import scala.reflect.ClassTag

import org.apache.spark.{HashPartitioner, Partitioner}
import org.apache.spark.Partitioner._
import org.apache.spark.rdd.RDD

import org.memeticlabs.spark.rdd.trycatch.TryCatchHelpers._

/**
	* RDD functions with try-catch error handling
	*/
private[memeticlabs] class TryCatchRDDFunctions[T](rdd: RDD[T] )(implicit tt: ClassTag[T])
	extends Serializable
{
	private def trySuccess[U: ClassTag]( f: T => U,
	                                     errorHandler: errorHandlerFn[T] ): RDD[TryCatchResultWithInput[T, U]] =
		rdd.map( tryCatchResult( f, errorHandler ) ).filter( _.isSuccess )

	/** Transformations (return a new RDD) */

	/**
		* map with error-handling
		*/
	def tryMap[U: ClassTag]( f: T => U,
	                         errorHandler: errorHandlerFn[T] ): RDD[U] =
		trySuccess( f, errorHandler ).map( _.getResult )

	/**
		* flat-map with error-handling
		*/
	def tryFlatMap[U: ClassTag]( f: T => TraversableOnce[U],
	                             errorHandler: errorHandlerFn[T] ): RDD[U] =
		trySuccess( f, errorHandler ).flatMap( _.getResult )

	/**
		* filter with error-handling
		*/
	def tryFilter( f: ( T ) => Boolean,
	               errorHandler: errorHandlerFn[T] ): RDD[T] =
		trySuccess(f, errorHandler).filter( _.getResult ).map( _.getInput )

	/**
		* key-by with error-handling
		*/
	def tryKeyBy[K]( f: T => K,
	                 errorHandler: errorHandlerFn[T] )
	               (implicit kt: ClassTag[K]): RDD[(K, T)] =
		trySuccess( f, errorHandler ).keyBy( _.getResult ).mapValues( _.getInput )

	def tryGroupBy[K]( f: T => K, errorHandler: errorHandlerFn[T])
	                 (implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] =
		tryGroupBy( f, defaultPartitioner(rdd), errorHandler )

	def tryGroupBy[K]( f: T => K, numPartitions: Int, errorHandler: errorHandlerFn[T])
	                 (implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] =
		tryGroupBy( f, new HashPartitioner(numPartitions), errorHandler )

	/**
		* group-by with error-handling
		*/
	def tryGroupBy[K]( f: T => K,
	                   p: Partitioner,
	                   errorHandler: errorHandlerFn[T] )
	                 (implicit kt: ClassTag[K], ord: Ordering[K] = null): RDD[(K, Iterable[T])] =
		trySuccess( f, errorHandler ).map( tr => ( tr.getResult, tr.getInput ) ).groupByKey(p)

	def tryMapPartitions[U: ClassTag]( f: Iterator[T] => Iterator[U],
	                                   errorHandler: errorHandlerFn[Iterator[T]],
	                                   preservesPartitioning: Boolean = false ): RDD[U] =
		rdd.mapPartitions( tryCatchAndHandle( f, Iterator[U](), errorHandler ),
		                   preservesPartitioning )

	def tryMapPartitionsWithIndex[U: ClassTag]( f: (Int, Iterator[T]) => Iterator[U],
	                                            errorHandler: errorHandlerFn[(Int, Iterator[T])],
	                                            preservesPartitioning: Boolean = false ): RDD[U] =
		rdd.mapPartitionsWithIndex[U]( untupled(tryCatchAndHandle( f.tupled, Iterator[U](), errorHandler )),
		                               preservesPartitioning )

	def tryZipPartitions[B: ClassTag, V: ClassTag]( rdd2: RDD[B],
	                                                preservesPartitioning: Boolean )
	                                              ( f: (Iterator[T], Iterator[B]) => Iterator[V],
	                                                errorHandler: errorHandlerFn[(Iterator[T], Iterator[B])] ): RDD[V] =
		rdd.zipPartitions( rdd2, preservesPartitioning
		                 )( untupled(tryCatchAndHandle( f.tupled,
		                                                Iterator[V](),
		                                                errorHandler )))

	def tryZipPartitions[B: ClassTag, C: ClassTag, V: ClassTag]( rdd2: RDD[B],
	                                                             rdd3: RDD[C],
	                                                             preservesPartitioning: Boolean )
	                                                           ( f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V],
	                                                             errorHandler: errorHandlerFn[(Iterator[T], Iterator[B], Iterator[C])] ): RDD[V] =
		rdd.zipPartitions( rdd2, rdd3, preservesPartitioning
		                 )( untupled(tryCatchAndHandle( f.tupled,
		                                                Iterator[V](),
		                                                errorHandler )))

	def tryZipPartitions[B: ClassTag, C: ClassTag, D: ClassTag, V: ClassTag]( rdd2: RDD[B],
	                                                                          rdd3: RDD[C],
	                                                                          rdd4: RDD[D],
	                                                                          preservesPartitioning: Boolean)
	                                                                        ( f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
	                                                                          errorHandler: errorHandlerFn[(Iterator[T], Iterator[B], Iterator[C], Iterator[D])] ): RDD[V] =
		rdd.zipPartitions( rdd2, rdd3, rdd4, preservesPartitioning
		                 )( untupled(tryCatchAndHandle( f.tupled,
		                                                Iterator[V](),
		                                                errorHandler )))

	/** Actions */

	def tryForeach( f: T => Unit, errorHandler: errorHandlerFn[T] ): Unit =
		rdd.foreach( tryCatchAndHandle(f, errorHandler) )

	def tryForeachPartition(f: Iterator[T] => Unit, errorHandler: errorHandlerFn[Iterator[T]] ): Unit =
		rdd.foreachPartition( tryCatchAndHandle( f, errorHandler ) )

	def tryReduce(zeroValue: T)( f: (T, T) => T, errorHandler: errorHandlerFn[(T, T)] ): T =
		rdd.reduce( untupled( tryCatchAndHandle( f.tupled, zeroValue, errorHandler ) ) )

	def tryTreeReduce(zeroValue: T)(f: (T, T) => T, depth: Int = 2, errorHandler: errorHandlerFn[(T, T)] ): T =
		rdd.treeReduce( untupled( tryCatchAndHandle( f.tupled, zeroValue, errorHandler ) ), depth )

	def tryFold(zeroValue: T)( op: (T, T) => T, errorHandler: errorHandlerFn[(T, T)] ): T =
		rdd.fold(zeroValue)( untupled(tryCatchAndHandle( op.tupled, zeroValue, errorHandler )) )

	def tryAggregate[U: ClassTag](zeroValue: U)( seqOp: (U, T) => U,
	                                             seqErrorHandler: errorHandlerFn[(U, T)],
	                                             combOp: (U, U) => U,
	                                             combErrorHandler: errorHandlerFn[(U, U)] ): U =
	{
		val seqFn = untupled(tryCatchAndHandle( seqOp.tupled, zeroValue, seqErrorHandler ))
		val combFn = untupled(tryCatchAndHandle( combOp.tupled, zeroValue, combErrorHandler ))
		rdd.aggregate(zeroValue)( seqFn, combFn )
	}

	def tryTreeAggregate[U: ClassTag](zeroValue: U)( seqOp: (U, T) => U,
	                                                 seqErrorHandler: errorHandlerFn[(U, T)],
	                                                 combOp: (U, U) => U,
	                                                 combErrorHandler: errorHandlerFn[(U, U)],
	                                                 depth: Int = 2): U =
	{
		val seqFn = untupled(tryCatchAndHandle( seqOp.tupled, zeroValue, seqErrorHandler ))
		val combFn = untupled(tryCatchAndHandle( combOp.tupled, zeroValue, combErrorHandler ))
		rdd.treeAggregate(zeroValue)( seqFn, combFn, depth )
	}
}

object TryCatchRDDFunctions
{
	implicit def rddToTryCatchRDDFunctions[T]( rdd: RDD[T] )
	                                         (implicit tt: ClassTag[T]): TryCatchRDDFunctions[T] =
		new TryCatchRDDFunctions[T]( rdd )
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy