
com.signalcollect.console.AggregationOperation.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of signal-collect_2.11 Show documentation
Show all versions of signal-collect_2.11 Show documentation
A framework for parallel and distributed graph processing.
The newest version!
/*
* @author Carol Alexandru
*
* Copyright 2013 University of Zurich
*
* Licensed below the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed below the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations below the License.
*/
package com.signalcollect.console
import scala.language.postfixOps
import com.signalcollect.DataGraphVertex
import com.signalcollect.interfaces.AggregationOperation
import com.signalcollect.interfaces.ModularAggregationOperation
import org.json4s._
import org.json4s.JsonDSL._
import com.signalcollect.TopKFinder
import com.signalcollect.Edge
import com.signalcollect.Vertex
import BreakConditionName._
/**
* Aggregator that loads a JObject representation of vertices and their edges.
*
* Given the set of IDs, the aggregator retrieves the corresponding vertices and the
* edges between the vertices. The aggregator returns a JObject, which contains
* two objects, one for vertices, one for edges. The data structure is best
* explained by an example:
*
* {{{
* {"vertices":{"id1":{"s":"0.15","ss":0.0,"cs":1.0},
* "id2":{"s":"0.16","ss":1.0,"cs":1.0},
* "id3":{"s":"0.17","ss":1.0,"cs":1.0}},
* "edges":{"id1":["id2","id3"]}}}
* }}}
*
* The vertices object uses a vertex id as key and stores the state, signal and
* collect scores. The edges object uses a vertex id as key and stores the list
* of target vertices.
*
* @constructor create the aggregator
* @param vertexIds set of vertex IDs to be loaded
* @param exposeVertices whether or not to call expose() on each vertex and put
* the return value into the "info" property. If true, may increase
* graph loading time significantly.
*/
class GraphAggregator[Id](
vertexIds: Set[Id] = Set[Id](),
exposeVertices: Boolean = false)
extends AggregationOperation[(Double, Double, JObject)] {
def interpretState(s: Any): Double = {
s match {
case x: Double => x
case x: Int => x.toDouble
case x: Long => x.toDouble
case x: Float => x.toDouble
case otherwise => 0.0
}
}
def extract(v: Vertex[_, _, _, _]): ((Double, Double, JObject)) = {
def state: Double = interpretState(v.state)
try {
if (vertexIds.contains(v.id.asInstanceOf[Id])) {
// Get the list of target vertices that this vertex' edges point at
val targetVertices = v.targetIds.filter { targetId => vertexIds.contains(targetId.asInstanceOf[Id])
} map { targetId => JString(targetId.toString) } toList
val stateString = v.state match {
case null => "null"
case other => other.toString
}
def vertexProperties: List[JField] = (List(JField("s", stateString),
JField("es", targetVertices.size),
JField("ss", v.scoreSignal),
JField("cs", v.scoreCollect),
JField("t", v.getClass.toString.split("""\.""").last),
JField("info",
if (exposeVertices) {
JObject(
(for ((k, v) <- v.expose) yield {
JField(k, Toolkit.serializeAny(v))
}).toList)
} else { JNothing })))
def verticesObj: (String, JObject) = ("vertices",
JObject(List(JField(v.id.toString, JObject(vertexProperties)))))
def edgesObj: (String, JObject) = ("edges", JObject(List(JField(v.id.toString, JArray(targetVertices)))))
(state, state, verticesObj ~ edgesObj)
} else { (state, state, JObject(List())) }
} catch {
case t: Throwable =>
println(t.getMessage)
t.printStackTrace
throw t
}
}
def reduce(subGraphs: Stream[(Double, Double, JObject)]): (Double, Double, JObject) = {
// Determine the lowest and highest state and merge the sub-graphs
subGraphs.size match {
case 0 => (0.0, 0.0, JObject(List()))
case otherwise =>
subGraphs.foldLeft((subGraphs.head._1, subGraphs.head._2, JObject(List()))) { (acc, v) =>
(if (acc._1 < v._1) acc._1 else v._1,
if (acc._2 > v._2) acc._2 else v._2,
acc._3 merge v._3)
}
}
}
}
/**
* Aggregator that retrieves a random sample of vertex IDs.
*
* @constructor create the aggregator
* @param sampleSize the number of vertex IDs to retrieve
*/
class SampleAggregator[Id](sampleSize: Int)
extends ModularAggregationOperation[Set[Id]] {
val neutralElement = Set[Id]()
def aggregate(a: Set[Id], b: Set[Id]): Set[Id] = {
val combinedSet = a ++ b
combinedSet.slice(0, math.min(sampleSize, combinedSet.size)).toSet
}
def extract(v: Vertex[_, _, _, _]): Set[Id] = {
Set(v.id.asInstanceOf[Id])
}
}
/**
* Aggregator that retrieves vertices with the highest degree.
*
* The aggregator produces a map of vertex IDs to degrees.
*
* @constructor create the aggregator
* @param n the number of top elements to find
*/
class TopDegreeAggregator[Id](n: Int)
extends AggregationOperation[Map[Id, Int]] {
def extract(v: Vertex[_, _, _, _]): Map[Id, Int] = {
// Create one map from this id to the number of outgoing edges
Map(v.id.asInstanceOf[Id] -> v.targetIds.size) ++
// Create several maps, one for each target id to 1
v.targetIds map { _.asInstanceOf[Id] -> 1 } toMap
}
def reduce(vertexToDegreeMap: Stream[Map[Id, Int]]): Map[Id, Int] = {
// Combine the maps created above to count the total number of edges
Toolkit.mergeMaps(vertexToDegreeMap)((v1, v2) => v1 + v2)
}
}
/**
* Aggregator that retrieves vertices with the highest or lowest state.
*
* The aggregator produces a list of tuples, each containing the state and the
* vertex ID with that state.
*
* @constructor create the aggregator
* @param n the number of top elements to find
* @param inverted gather by lowest, not highest state
*/
class TopStateAggregator[Id](n: Int, inverted: Boolean)
extends AggregationOperation[List[(Double, Id)]] {
def extract(v: Vertex[_, _, _, _]): List[(Double, Id)] = {
// Try to interpret different types of numberic states
val state: Option[Double] = v.state match {
case x: Double => Some(x)
case x: Int => Some(x.toDouble)
case x: Long => Some(x.toDouble)
case x: Float => Some(x.toDouble)
case otherwise => None
}
state match {
case Some(number) =>
List[(Double, Id)]((number, v.id.asInstanceOf[Id]))
case otherwise => List[(Double, Id)]()
}
}
def reduce(statesAndIds: Stream[List[(Double, Id)]]): List[(Double, Id)] = {
// Sort the tuples by descending/ascending value and take the first n tuples
statesAndIds.foldLeft(List[(Double, Id)]()) { (acc, n) => acc ++ n }
.sortWith({ (t1, t2) =>
if (inverted) { t1._1 < t2._1 }
else { t1._1 > t2._1 }
})
.take(n)
}
}
/**
* Aggregator that retrieves vertices with the highest signal or collect scores.
*
* The aggregator produces a list of tuples, each containing the score and the
* vertex ID with that score.
*
* @constructor create the aggregator
* @param n the number of top elements to find
* @param scoreType the score to look at (signal or collect)
*/
class AboveThresholdAggregator[Id](n: Int, scoreType: String, threshold: Double)
extends AggregationOperation[List[(Double, Id)]] {
def extract(v: Vertex[_, _, _, _]): List[(Double, Id)] = {
val score = scoreType match {
case "signal" => v.scoreSignal
case "collect" => v.scoreCollect
}
// Yield score and id only if the score is above the threshold
if (score > threshold) {
List[(Double, Id)]((score, v.id.asInstanceOf[Id]))
} else {
List[(Double, Id)]()
}
}
def reduce(thresholdsAndIds: Stream[List[(Double, Id)]]): List[(Double, Id)] = {
// Sort the tuples by descending thresholds and take the first n tuples
thresholdsAndIds.foldLeft(List[(Double, Id)]()) { (acc, n) => acc ++ n }
.sortWith({ (t1, t2) => t1._1 > t2._1 })
.take(n)
}
}
/**
* Aggregator that loads the IDs of vertices in the vicinity of other vertices.
*
* The aggregator produces a new set of IDs representing the vertices that are
* connected to any of the vertices in the given set, be it incoming or outgoing.
*
* @constructor create the aggregator
* @param ids set of vertex IDs to be loaded
*/
class FindVertexVicinitiesByIdsAggregator[Id](ids: Set[Id])
extends AggregationOperation[Set[Id]] {
def extract(v: Vertex[_, _, _, _]): Set[Id] = {
if (!(v.targetIds.toStream.filter(targetId => ids.contains(targetId.asInstanceOf[Id])) isEmpty)) {
// If this has an outgoing edge to a primary vertex, it's a vicinity vertex.
Set(v.id.asInstanceOf[Id])
} else if (ids.contains(v.id.asInstanceOf[Id])) {
// If this vertex is a primary vertex, all its targets are vicinity vertices
v.targetIds.asInstanceOf[Traversable[Id]].toSet
} else {
// If neither is true, this vertex is irrelevant
Set()
}
}
def reduce(vertexIds: Stream[Set[Id]]): Set[Id] = {
vertexIds.toSet.flatten
}
}
/**
* Aggregator that translates a list of strings to a list of vertices.
*
* The aggregator compares the string representation of the id of any vertex
* to the strings supplied to it.
*
* @constructor create the aggregator
* @param idsList the list of IDs to compare vertex IDs with
*/
class FindVerticesByIdsAggregator[Id](idsList: List[String])
extends AggregationOperation[List[Vertex[Id, _, _, _]]] {
def ids: Set[String] = idsList.toSet
def extract(v: Vertex[_, _, _, _]): List[Vertex[Id, _, _, _]] = {
if (ids.contains(v.id.toString)) { List(v.asInstanceOf[Vertex[Id, _, _, _]]) }
else { List() }
}
def reduce(vertices: Stream[List[Vertex[Id, _, _, _]]]): List[Vertex[Id, _, _, _]] = {
vertices.toList.flatten
}
}
/**
* Aggregator that finds a list of node IDs which contain a given substring.
*
* @constructor create the aggregator
* @param s the substring that should be contained in the node ID
* @param limit maximum number of nodes to find
*/
class FindVertexIdsBySubstringAggregator[Id](s: String, limit: Int)
extends ModularAggregationOperation[Set[Id]] {
val neutralElement = Set[Id]()
def extract(v: Vertex[_, _, _, _]): Set[Id] = {
if (v.id.toString.contains(s)) { Set(v.id.asInstanceOf[Id]) }
else { Set() }
}
def aggregate(a: Set[Id], b: Set[Id]): Set[Id] = {
val combinedSet = a ++ b
combinedSet.slice(0, math.min(limit, combinedSet.size)).toSet
}
}
/**
* Aggregator that checks if any of the break conditions apply
*
* The aggregator takes a map of IDs (strings used to identify break
* conditions) to BreakCondition items. It produces a map of the same IDs to
* strings which represent the reason for the condition firing. For example,
* one result item may be ("3" -> "0.15"), which would mean that the condition
* identified as "3" fired because of a value "0.15". Depending on the state,
* not all condition checks are performed. For example, the signal threshold
* is only ever checked before the signaling step, because else it will be 0
* anyway.
*
* @constructor create the aggregator
* @param conditions map of conditions
* @param state string denoting the current state
*/
class BreakConditionsAggregator(conditions: Map[String, BreakCondition], state: String)
extends AggregationOperation[Map[String, String]] {
val relevantVertexIds = conditions.map(_._2.props("vertexId")).toSet
def extract(v: Vertex[_, _, _, _]): Map[String, String] = {
var results = Map[String, String]()
if (relevantVertexIds.contains(v.id.toString)) {
conditions.foreach {
case (id, c) =>
if (v.id.toString == c.props("vertexId")) {
// It depends on the state which checks are performed, because
// some checks would falsely return true during some states. For
// example, checking the signal scores right after a signal step
// would yield a value below the threshold every time. In this
// case, we only check the signal scores after a collect step.
state match {
case "checksAfterSignal" => c.name match {
case CollectScoreBelowThreshold =>
if (v.scoreCollect < c.props("collectThreshold").toDouble) {
results += (id -> v.scoreCollect.toString)
}
case CollectScoreAboveThreshold =>
if (v.scoreCollect > c.props("collectThreshold").toDouble) {
results += (id -> v.scoreCollect.toString)
}
case otherwise =>
}
case "checksAfterCollect" => c.name match {
case StateChanges =>
if (v.state.toString != c.props("currentState")) {
results += (id -> v.state.toString)
}
case StateAbove =>
if (v.state.toString.toDouble > c.props("expectedState").toDouble) {
results += (id -> v.state.toString)
}
case StateBelow =>
if (v.state.toString.toDouble < c.props("expectedState").toDouble) {
results += (id -> v.state.toString)
}
case SignalScoreBelowThreshold =>
if (v.scoreSignal < c.props("signalThreshold").toDouble) {
results += (id -> v.scoreSignal.toString)
}
case SignalScoreAboveThreshold =>
if (v.scoreSignal > c.props("signalThreshold").toDouble) {
results += (id -> v.scoreSignal.toString)
}
case otherwise =>
}
}
}
}
}
results
}
def reduce(results: Stream[Map[String, String]]): Map[String, String] = {
Toolkit.mergeMaps(results.toList)((v1, v2) => v1 + v2)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy