All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.examples.scala.graph.ConnectedComponents.scala Maven / Gradle / Ivy

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.examples.scala.graph;

import eu.stratosphere.client.LocalExecutor
import eu.stratosphere.api.common.Program
import eu.stratosphere.api.common.ProgramDescription
import eu.stratosphere.api.scala.analysis.GlobalSchemaPrinter
import eu.stratosphere.api.common.operators.DeltaIteration
import scala.math._
import eu.stratosphere.api.scala._
import eu.stratosphere.api.scala.operators._
import eu.stratosphere.api.common.Plan

object RunConnectedComponents {
 def main(pArgs: Array[String]) {
    if (pArgs.size < 3) {
      println("usage: -vertices  -edges  -output ")
      return
    }
    val args = Args.parse(pArgs)
    val plan = new ConnectedComponents().getPlan(args("vertices"), args("edges"), args("output"))
    LocalExecutor.execute(plan)
    System.exit(0)
  }
}

class ConnectedComponents extends Program with Serializable {
  
    override def getPlan(args: String*) = {
      val plan = getScalaPlan(args(1), args(2), args(3), args(4).toInt)
      plan.setDefaultParallelism(args(0).toInt)
      plan
  }
  
  def getScalaPlan(verticesInput: String, edgesInput: String, componentsOutput: String, maxIterations: Int = 10) = {

  val vertices = DataSource(verticesInput, DelimitedInputFormat(parseVertex))
  val directedEdges = DataSource(edgesInput, DelimitedInputFormat(parseEdge))

  val undirectedEdges = directedEdges flatMap { case (from, to) => Seq(from -> to, to -> from) }

    def propagateComponent(s: DataSet[(Int, Int)], ws: DataSet[(Int, Int)]) = {

      val allNeighbors = ws join undirectedEdges where { case (v, _) => v } isEqualTo { case (from, _) => from } map { (w, e) => e._2 -> w._2 }
      val minNeighbors = allNeighbors groupBy { case (to, _) => to } reduceGroup { cs => cs minBy { _._2 } }

      // updated solution elements == new workset
      val s1 = s join minNeighbors where { _._1 } isEqualTo { _._1 } flatMap { (n, s) =>
        (n, s) match {
          case ((v, cOld), (_, cNew)) if cNew < cOld => Some((v, cNew))
          case _ => None
        }
      }
//      s1.left preserves({ case (v, _) => v }, { case (v, _) => v })
      s1.right preserves({ v=>v }, { v=>v })

      (s1, s1)
    }

    val components = vertices.iterateWithDelta(vertices, { _._1 }, propagateComponent, maxIterations)
    val output = components.write(componentsOutput, DelimitedOutputFormat(formatOutput.tupled))

    vertices.avgBytesPerRecord(8)
    directedEdges.avgBytesPerRecord(8)
    undirectedEdges.avgBytesPerRecord(8).avgRecordsEmittedPerCall(2)

    val plan = new ScalaPlan(Seq(output), "Connected Components")
    GlobalSchemaPrinter.printSchema(plan)
    plan
  }

  def parseVertex = (line: String) => { val v = line.toInt; v -> v }

  val EdgeInputPattern = """(\d+) (\d+)""".r

  def parseEdge = (line: String) => line match {
    case EdgeInputPattern(from, to) => from.toInt -> to.toInt
  }

  def formatOutput = (vertex: Int, component: Int) => "%d %d".format(vertex, component)
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy