All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.api.scala.analysis.GlobalSchemaGenerator.scala Maven / Gradle / Ivy

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.api.scala.analysis

import java.util.{List => JList}
import eu.stratosphere.api.common.operators.Operator
import eu.stratosphere.api.scala._
import eu.stratosphere.api.java.record.operators._
import eu.stratosphere.api.common.operators.base.{BulkIterationBase => BulkIteration, DeltaIterationBase => DeltaIteration, GenericDataSourceBase, MapOperatorBase}
import eu.stratosphere.api.common.operators.Union
import eu.stratosphere.types.Record
import scala.Some
import scala.Some


class GlobalSchemaGenerator {

  def initGlobalSchema(sinks: Seq[Operator[Record] with ScalaOperator[_, _]]): Unit = {
    // don't do anything, we don't need global positions if we don't do reordering of operators
    // FieldSet.toSerializerIndexArray returns local positions and ignores global positions
    // sinks.foldLeft(0) { (freePos, contract) => globalizeContract(contract, Seq(), Map(), None, freePos) }
  }

  /**
   * Computes disjoint write sets for a contract and its inputs.
   *
   * @param contract The contract to globalize
   * @param parentInputs Input fields which should be bound to the contract's outputs
   * @param proxies Provides contracts for iteration placeholders
   * @param fixedOutputs Specifies required positions for the contract's output fields, or None to allocate new positions
   * @param freePos The current first available position in the global schema
   * @return The new first available position in the global schema
   */
  private def globalizeContract(contract: Operator[Record], parentInputs: Seq[FieldSet[InputField]], proxies: Map[Operator[Record], Operator[Record] with ScalaOperator[_, _]], fixedOutputs: Option[FieldSet[Field]], freePos: Int): Int = {

    val contract4s = proxies.getOrElse(contract, contract.asInstanceOf[Operator[Record] with ScalaOperator[_, _]])

    parentInputs.foreach(contract4s.getUDF.attachOutputsToInputs)

    contract4s.getUDF.outputFields.isGlobalized match {

      case true => freePos

      case false => {

        val freePos1 = globalizeContract(contract4s, proxies, fixedOutputs, freePos)

        contract4s.persistConfiguration(None)

        freePos1
      }
    }
  }

  private def globalizeContract(contract: Operator[_] with ScalaOperator[_, _], proxies: Map[Operator[Record], Operator[Record] with ScalaOperator[_, _]], fixedOutputs: Option[FieldSet[Field]], freePos: Int): Int = {

    contract match {

      case contract : FileDataSink with ScalaOutputOperator[_] => {
        contract.getUDF.outputFields.setGlobalized()
        globalizeContract(contract.getInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.asInstanceOf[UDF1[_,_]].inputFields), proxies, None, freePos)
      }

      case contract: GenericDataSourceBase[_, _] with ScalaOperator[_, _] => {
        contract.getUDF.setOutputGlobalIndexes(freePos, fixedOutputs)
      }

      case contract : BulkIteration[_] with BulkIterationScalaOperator[_] => {
        val s0 = contract.getInput().asInstanceOf[Operator[Record]]

        val s0contract = proxies.getOrElse(s0, s0.asInstanceOf[Operator[Record] with ScalaOperator[_, Record]])
        val newProxies = proxies + (contract.getPartialSolution().asInstanceOf[Operator[Record]] -> s0contract)

        val freePos1 = globalizeContract(s0, Seq(), proxies, fixedOutputs, freePos)
        val freePos2 = globalizeContract(contract.getNextPartialSolution().asInstanceOf[Operator[Record]], Seq(), newProxies, Some(s0contract.getUDF.outputFields), freePos1)
        val freePos3 = Option(contract.getTerminationCriterion().asInstanceOf[Operator[Record]]) map { globalizeContract(_, Seq(), newProxies, None, freePos2) } getOrElse freePos2

        contract.getUDF.assignOutputGlobalIndexes(s0contract.getUDF.outputFields)

        freePos3
      }

      case contract : DeltaIteration[_, _] with DeltaIterationScalaOperator[_] => {
//      case contract @ WorksetIterate4sContract(s0, ws0, deltaS, newWS, placeholderS, placeholderWS) => {
        val s0 = contract.getInitialSolutionSet().asInstanceOf[Operator[Record]]
        val ws0 = contract.getInitialWorkset().asInstanceOf[Operator[Record]]
        val deltaS = contract.getSolutionSetDelta.asInstanceOf[Operator[Record]]
        val newWS = contract.getNextWorkset.asInstanceOf[Operator[Record]]

        val s0contract = proxies.getOrElse(s0, s0.asInstanceOf[Operator[Record] with ScalaOperator[_, _]])
        val ws0contract = proxies.getOrElse(ws0, ws0.asInstanceOf[Operator[Record] with ScalaOperator[_, _]])
        val newProxies = proxies + (contract.getSolutionSetDelta.asInstanceOf[Operator[Record]] -> s0contract) + (contract.getNextWorkset.asInstanceOf[Operator[Record]] -> ws0contract)

        val freePos1 = globalizeContract(s0, Seq(contract.key.inputFields), proxies, fixedOutputs, freePos)
        val freePos2 = globalizeContract(ws0, Seq(), proxies, None, freePos1)
        val freePos3 = globalizeContract(deltaS, Seq(), newProxies, Some(s0contract.getUDF.outputFields), freePos2)
        val freePos4 = globalizeContract(newWS, Seq(), newProxies, Some(ws0contract.getUDF.outputFields), freePos3)

        contract.getUDF.assignOutputGlobalIndexes(s0contract.getUDF.outputFields)

        freePos4
      }

      case contract : CoGroupOperator with TwoInputKeyedScalaOperator[_, _, _] => {

        val freePos1 = globalizeContract(contract.getFirstInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.leftInputFields, contract.leftKey.inputFields), proxies, None, freePos)
        val freePos2 = globalizeContract(contract.getSecondInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.rightInputFields, contract.rightKey.inputFields), proxies, None, freePos1)

        contract.getUDF.setOutputGlobalIndexes(freePos2, fixedOutputs)
      }

      case contract: CrossOperator with TwoInputScalaOperator[_, _, _] => {

        val freePos1 = globalizeContract(contract.getFirstInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.leftInputFields), proxies, None, freePos)
        val freePos2 = globalizeContract(contract.getSecondInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.rightInputFields), proxies, None, freePos1)

        contract.getUDF.setOutputGlobalIndexes(freePos2, fixedOutputs)
      }

      case contract : JoinOperator with TwoInputKeyedScalaOperator[_, _, _] => {

        val freePos1 = globalizeContract(contract.getFirstInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.leftInputFields, contract.leftKey.inputFields), proxies, None, freePos)
        val freePos2 = globalizeContract(contract.getSecondInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.rightInputFields, contract.rightKey.inputFields), proxies, None, freePos1)

        contract.getUDF.setOutputGlobalIndexes(freePos2, fixedOutputs)
      }

      case contract : MapOperatorBase[_, _, _] with OneInputScalaOperator[_, _] => {

        val freePos1 = globalizeContract(contract.getInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.inputFields), proxies, None, freePos)

        contract.getUDF.setOutputGlobalIndexes(freePos1, fixedOutputs)
      }

      case contract : ReduceOperator with OneInputKeyedScalaOperator[_, _] => {

        val freePos1 = globalizeContract(contract.getInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.inputFields, contract.key.inputFields), proxies, None, freePos)

        contract.getUDF.setOutputGlobalIndexes(freePos1, fixedOutputs)
      }

      // for key-less (global) reducers
      case contract : ReduceOperator with OneInputScalaOperator[_, _] => {

        val freePos1 = globalizeContract(contract.getInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.inputFields), proxies, None, freePos)

        contract.getUDF.setOutputGlobalIndexes(freePos1, fixedOutputs)
      }

      case contract : Union[_] with UnionScalaOperator[_] => {
        
        val freePos1 = globalizeContract(contract.getFirstInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.leftInputFields), proxies, fixedOutputs, freePos)
        val freePos2 = globalizeContract(contract.getSecondInput().asInstanceOf[Operator[Record]], Seq(contract.getUDF.rightInputFields), proxies, fixedOutputs, freePos1)

        contract.getUDF.setOutputGlobalIndexes(freePos2, fixedOutputs)
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy