
com.signalcollect.AggregationOperations.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of signal-collect_2.11 Show documentation
Show all versions of signal-collect_2.11 Show documentation
A framework for parallel and distributed graph processing.
The newest version!
/*
* @author Philip Stutz
*
* Copyright 2011 University of Zurich
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.signalcollect
import scala.collection.mutable.PriorityQueue
import scala.reflect.ClassTag
import scala.util.Sorting
import com.signalcollect.interfaces.AggregationOperation
import com.signalcollect.interfaces.ComplexAggregation
import com.signalcollect.interfaces.ModularAggregationOperation
/**
* Aggregator that executes two aggregation operations 'aggrA' and 'aggrB'
* at the same time and returns a tuple with the result of 'aggrA' as the
* first element and the result of 'aggrB' as the second element.
*/
case class MultiAggregator[ResultTypeA, ResultTypeB](
aggrA: ModularAggregationOperation[ResultTypeA],
aggrB: ModularAggregationOperation[ResultTypeB])
extends ModularAggregationOperation[(ResultTypeA, ResultTypeB)] {
def extract(v: Vertex[_, _, _, _]) = {
(aggrA.extract(v), aggrB.extract(v))
}
def aggregate(a: (ResultTypeA, ResultTypeB), b: (ResultTypeA, ResultTypeB)) = {
(aggrA.aggregate(a._1, b._1), aggrB.aggregate(a._2, b._2))
}
def neutralElement = {
(aggrA.neutralElement, aggrB.neutralElement)
}
}
/**
* Builds a map with the vertex ids as keys and the vertex states as values.
*
* Only works on graphs where this information fits into memory.
*/
case class IdStateMapAggregator[IdType, StateType]() extends ModularAggregationOperation[Map[IdType, StateType]] {
val neutralElement = Map[IdType, StateType]()
def extract(v: Vertex[_, _, _, _]): Map[IdType, StateType] = {
Map[IdType, StateType]((v.id.asInstanceOf[IdType], v.state.asInstanceOf[StateType]))
}
def aggregate(a: Map[IdType, StateType], b: Map[IdType, StateType]): Map[IdType, StateType] = a ++ b
}
/**
* Aggregation operation that sums up all the vertex states that have numeric type `N`.
*/
case class SumOfStates[N: Numeric: Manifest]() extends ReduceStatesOperation[N] {
val numeric = implicitly[Numeric[N]]
/**
* Sums up the values
*/
def operation(a: N, b: N): N = numeric.plus(a, b)
}
/**
* Aggregation operation that multiplies all the vertex states that have numeric type `N`.
*/
case class ProductOfStates[N: Numeric: Manifest]() extends ReduceStatesOperation[N] {
val numeric = implicitly[Numeric[N]]
/**
* Multiplies the values
*/
def operation(a: N, b: N): N = numeric.times(a, b)
}
/**
* Aggregation operation that returns a sample of all vertex ids.
*/
case class SampleVertexIds(sampleSize: Int) extends ModularAggregationOperation[List[Any]] {
val neutralElement = List[Any]()
def aggregate(a: List[Any], b: List[Any]): List[Any] = {
val combinedList = a ++ b
combinedList.slice(0, math.min(sampleSize, combinedList.size))
}
def extract(v: Vertex[_, _, _, _]): List[Any] = {
List(v.id)
}
}
/**
* Aggregation operation that counts the number of vertices in this graph that have type `VertexType`.
*
* @example `val numberOfPageRankVertices = graph.aggregate(new CountVertices[PageRankVertex])`
*/
case class CountVertices[VertexType <: Vertex[_, _, _, _]: ClassTag]() extends ModularAggregationOperation[Long] {
val neutralElement: Long = 0l
/**
* Sums up the number of vertices of type `VertexType` that were found.
*/
def aggregate(a: Long, b: Long): Long = a + b
/**
* Returns 1 for vertices that match `VertexType`, 0 for other types
*/
def extract(v: Vertex[_, _, _, _]): Long = {
v match {
case v: VertexType => 1l
case other => 0l
}
}
}
/**
* Extracts states from vertices
*/
abstract class StateExtractor[StateType: Manifest] extends ModularAggregationOperation[Option[StateType]] {
/**
* Extracts the state from v if it matches `StateType`
*/
def extract(v: Vertex[_, _, _, _]): Option[StateType] = {
try {
Some(v.state.asInstanceOf[StateType]) // not nice, but isAssignableFrom is slow and has nasty issues with boxed/unboxed
} catch {
case _: Throwable => None
}
}
}
/**
* Calculates the aggregate of all vertex states of type `ValueType` using the associative function `operation`.
*/
abstract class ReduceStatesOperation[ValueType: Manifest] extends StateExtractor[ValueType] {
def operation(a: ValueType, b: ValueType): ValueType
val neutralElement = None
def aggregate(a: Option[ValueType], b: Option[ValueType]): Option[ValueType] = {
(a, b) match {
case (Some(x), Some(y)) => Some(operation(x, y))
case (Some(x), None) => Some(x)
case (None, Some(y)) => Some(y)
case (None, None) => None
}
}
}
/**
* Implements an extractor for aggregation operations that will operate on states of type `StateType`.
*/
abstract class StateAggregator[StateType] extends ModularAggregationOperation[StateType] {
/**
* Extracts the state from v if it matches `StateType`
*/
def extract(v: Vertex[_, _, _, _]): StateType = {
try {
v.state.asInstanceOf[StateType] // not nice, but isAssignableFrom is slow and has nasty issues with boxed/unboxed
} catch {
case _: Throwable => neutralElement
}
}
}
/**
* Finds the ids and states of the K vertices with the largest states.
*/
case class TopKFinder[State](k: Int)(implicit ord: Ordering[State])
extends ComplexAggregation[Iterable[(_, State)], Iterable[(_, State)]] {
implicit val ordering = Ordering.by((value: (_, State)) => value._2)
def aggregationOnWorker(vertices: Stream[Vertex[_, _, _, _]]): Iterable[(_, State)] = {
def extract(v: Vertex[_, _, _, _]): (_, State) = {
v match {
case vertex: Vertex[_, State, _, _] => (vertex.id, vertex.state)
}
}
val threadName = Thread.currentThread.getName
val values = (vertices map (extract(_)))
selectTopK(k, values)
}
protected def selectTopK[G: ClassTag](k: Int, items: Stream[G])(implicit ord: Ordering[G]): Iterable[G] = {
val startTime = System.currentTimeMillis
var counter = 0
val topK = new PriorityQueue[G]()(ord.reverse)
for (item <- items) {
counter += 1
if (topK.size < k) {
topK += item
} else {
if (ord.compare(topK.head, item) < 0) {
topK.dequeue
topK += item
}
}
}
topK.toArray[G]
}
def aggregationOnCoordinator(workerResults: Iterable[Iterable[(_, State)]]): Iterable[(_, State)] = {
val startTime = System.currentTimeMillis
def timePassedInSeconds = System.currentTimeMillis - System.currentTimeMillis
val values = workerResults.toStream.flatMap(identity)
val topK = (selectTopK(k, values)).toArray
Sorting.quickSort[(_, State)](topK)(ordering.reverse)
topK
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy