All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.scalding.CoGroupBuilder.scala Maven / Gradle / Ivy

There is a newer version: 0.7.3
Show newest version
/*
Copyright 2012 Twitter, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.twitter.scalding

import cascading.pipe.{CoGroup, Every, Pipe}
import cascading.pipe.joiner.MixedJoin
import cascading.tuple.Fields

/**
 * Builder classes used internally to implement coGroups (joins).
 * Can also be used for more generalized joins, e.g., star joins.
 *
 */
class CoGroupBuilder(groupFields : Fields, joinMode : JoinMode) extends GroupBuilder(groupFields) {
  protected var coGroups : List[(Fields, Pipe, JoinMode)] = Nil

  // Joins (cogroups) with pipe p on fields f.
  // Make sure that pipe p is smaller than the left side pipe, otherwise this
  // might take a while.
  def coGroup(f : Fields, p : Pipe, j : JoinMode = InnerJoinMode) = {
    coGroups ::= (f, RichPipe.assignName(p), j)
    this
  }

  // TODO: move the automatic renaming of fields here
  // and remove it from joinWithSmaller/joinWithTiny
  override def schedule(name : String, pipe : Pipe) : Pipe = {
    assert(!sortBy.isDefined, "cannot use a sortBy when doing a coGroup")
    assert(!coGroups.isEmpty, "coGroupBy requires at least one other pipe to .coGroup")
    val fields = (groupFields :: coGroups.map{ _._1 }).toArray
    val pipes = (pipe :: coGroups.map{ _._2 }).map{ RichPipe.assignName(_) }.toArray
    val joinModes = (joinMode :: coGroups.map{ _._3 }).map{ _.booleanValue }.toArray
    val mixedJoiner = new MixedJoin(joinModes)
    val cg : Pipe = new CoGroup(pipes, fields, null, mixedJoiner)
    overrideReducers(cg)
    evs.foldRight(cg)( (op : Pipe => Every, p) => op(p) )
  }
}

sealed abstract class JoinMode {
  def booleanValue : Boolean
}
case object InnerJoinMode extends JoinMode {
  override def booleanValue = true
}
case object OuterJoinMode extends JoinMode {
  override def booleanValue = false
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy