All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.api.scala.UnfinishedCoGroupOperation.scala Maven / Gradle / Ivy

There is a newer version: 0.10.2-hadoop1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.scala

import org.apache.flink.api.common.ExecutionConfig
import org.apache.flink.api.common.functions.CoGroupFunction
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.api.java.operators._
import org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo
import org.apache.flink.api.scala.typeutils.{CaseClassSerializer, CaseClassTypeInfo}
import org.apache.flink.util.Collector
import scala.collection.JavaConverters._
import scala.reflect.ClassTag


/**
 * An unfinished coGroup operation that results from [[DataSet.coGroup]] The keys for the left and
 * right side must be specified using first `where` and then `isEqualTo`. For example:
 *
 * {{{
 *   val left = ...
 *   val right = ...
 *   val coGroupResult = left.coGroup(right).where(...).isEqualTo(...)
 * }}}
 * @tparam L The type of the left input of the coGroup.
 * @tparam R The type of the right input of the coGroup.
 */
class UnfinishedCoGroupOperation[L: ClassTag, R: ClassTag](
                                                            leftInput: DataSet[L],
                                                            rightInput: DataSet[R])
  extends UnfinishedKeyPairOperation[L, R, CoGroupDataSet[L, R]](leftInput, rightInput) {

  private[flink] def finish(leftKey: Keys[L], rightKey: Keys[R]) = {
    val coGrouper = new CoGroupFunction[L, R, (Array[L], Array[R])] {
      def coGroup(
                   left: java.lang.Iterable[L],
                   right: java.lang.Iterable[R],
                   out: Collector[(Array[L], Array[R])]) = {
        val leftResult = Array[Any](left.asScala.toSeq: _*).asInstanceOf[Array[L]]
        val rightResult = Array[Any](right.asScala.toSeq: _*).asInstanceOf[Array[R]]

        out.collect((leftResult, rightResult))
      }
    }

    // We have to use this hack, for some reason classOf[Array[T]] does not work.
    // Maybe because ObjectArrayTypeInfo does not accept the Scala Array as an array class.
    val leftArrayType =
      ObjectArrayTypeInfo.getInfoFor(new Array[L](0).getClass, leftInput.getType)
    val rightArrayType =
      ObjectArrayTypeInfo.getInfoFor(new Array[R](0).getClass, rightInput.getType)

    val returnType = new CaseClassTypeInfo[(Array[L], Array[R])](
      classOf[(Array[L], Array[R])],
      Array(leftArrayType, rightArrayType),
      Seq(leftArrayType, rightArrayType),
      Array("_1", "_2")) {

      override def createSerializer(
          executionConfig: ExecutionConfig): TypeSerializer[(Array[L], Array[R])] = {
        val fieldSerializers: Array[TypeSerializer[_]] = new Array[TypeSerializer[_]](getArity)
        for (i <- 0 until getArity) {
          fieldSerializers(i) = types(i).createSerializer(executionConfig)
        }

        new CaseClassSerializer[(Array[L], Array[R])](
          classOf[(Array[L], Array[R])],
          fieldSerializers) {
          override def createInstance(fields: Array[AnyRef]) = {
            (fields(0).asInstanceOf[Array[L]], fields(1).asInstanceOf[Array[R]])
          }
        }
      }
    }
    val coGroupOperator = new CoGroupOperator[L, R, (Array[L], Array[R])](
      leftInput.javaSet, rightInput.javaSet, leftKey, rightKey, coGrouper, returnType,
      null, // partitioner
      getCallLocationName())

    new CoGroupDataSet(coGroupOperator, leftInput, rightInput, leftKey, rightKey)
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy