All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.table.runtime.aggregate.GroupAggFunction.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flink.table.runtime.aggregate

import org.apache.flink.api.common.state.ValueStateDescriptor
import org.apache.flink.runtime.state.keyed.KeyedValueState
import org.apache.flink.table.api.TableConfig
import org.apache.flink.table.api.types.{DataTypes, InternalType, TypeConverters}
import org.apache.flink.table.codegen.{EqualiserCodeGenerator, GeneratedAggsHandleFunction}
import org.apache.flink.table.dataformat.{BaseRow, JoinedRow}
import org.apache.flink.table.runtime.sort.RecordEqualiser
import org.apache.flink.table.runtime.functions.{AggsHandleFunction, ExecutionContext, ProcessFunction}
import org.apache.flink.table.typeutils.{BaseRowTypeInfo, TypeUtils}
import org.apache.flink.table.util.Logging
import org.apache.flink.table.dataformat.util.BaseRowUtil.isAccumulateMsg
import org.apache.flink.table.dataformat.util.{BaseRowUtil, BinaryRowUtil}
import org.apache.flink.util.Collector

/**
  * Class of Aggregate Function used for the groupby (without window) aggregate
  * @param genAggsHandler  the generated aggregate handler
  * @param accTypes        the accumulator types
  * @param aggValueTypes   the aggregate value types
  * @param inputCountIndex None when the input not contains retraction.
  *                        Some when input contains retraction and
  *                        the index represents the count1 agg index.
  * @param generateRetraction whether this operator will generate retraction
  */
class GroupAggFunction(
    genAggsHandler: GeneratedAggsHandleFunction,
    accTypes: Array[InternalType],
    aggValueTypes: Array[InternalType],
    inputCountIndex: Option[Int],
    generateRetraction: Boolean,
    groupWithoutKey: Boolean,
    tableConfig: TableConfig)
  extends ProcessFunctionWithCleanupState[BaseRow, BaseRow](tableConfig)
  with Logging {

  protected var function: AggsHandleFunction = _

  // stores the accumulators
  protected var accState: KeyedValueState[BaseRow, BaseRow] = _

  private val inputCounter: InputCounter = InputCounter(inputCountIndex)

  protected var newAggValue: BaseRow = _
  protected var prevAggValue: BaseRow = _
  protected var firstRow: Boolean = _

  protected var resultRow: JoinedRow = _

  @transient
  private var equaliser: RecordEqualiser = _

  override def open(ctx: ExecutionContext): Unit = {
    super.open(ctx)
    LOG.debug(s"Compiling AggsHandleFunction: ${genAggsHandler.name} \n\n " +
      s"Code:\n${genAggsHandler.code}")
    function = genAggsHandler.newInstance(ctx.getRuntimeContext.getUserCodeClassLoader)
    function.open(ctx)

    // serialize as GenericRow, deserialize as BinaryRow
    val accTypeInfo = new BaseRowTypeInfo(
      accTypes.map(TypeConverters.createExternalTypeInfoFromDataType): _*)
    val accDesc = new ValueStateDescriptor("accState", accTypeInfo)
    accState = ctx.getKeyedValueState(accDesc)

    val generator = new EqualiserCodeGenerator(aggValueTypes)
    val generatedEqualiser = generator.generateRecordEqualiser("GroupAggValueEqualiser")
    equaliser = generatedEqualiser.newInstance(ctx.getRuntimeContext.getUserCodeClassLoader)

    initCleanupTimeState("GroupAggregateCleanupTime")

    resultRow = new JoinedRow()
  }

  override def processElement(
      input: BaseRow,
      ctx: ProcessFunction.Context,
      out: Collector[BaseRow]): Unit = {
    val currentTime = ctx.timerService().currentProcessingTime()
    // register state-cleanup timer
    registerProcessingCleanupTimer(ctx, currentTime)

    val currentKey = executionContext.currentKey()

    var accumulators = accState.get(currentKey)
    if (null == accumulators) {
      firstRow = true
      accumulators = function.createAccumulators()
    } else {
      firstRow = false
    }

    // set accumulators to handler first
    function.setAccumulators(accumulators)
    // get previous aggregate result
    prevAggValue = function.getValue

    // update aggregate result and set to the newRow
    if (isAccumulateMsg(input)) {
      // accumulate input
      function.accumulate(input)
    } else {
      // retract input
      function.retract(input)
    }
    // get current aggregate result
    newAggValue = function.getValue

    // get accumulator
    accumulators = function.getAccumulators

    if (!inputCounter.countIsZero(accumulators)) {
      // we aggregated at least one record for this key

      // update the state
      accState.put(currentKey, accumulators)

      // if this was not the first row and we have to emit retractions
      if (!firstRow) {
        if (!stateCleaningEnabled && equaliser.equalsWithoutHeader(prevAggValue, newAggValue)) {
          // newRow is the same as before and state cleaning is not enabled.
          // We do not emit retraction and acc message.
          // If state cleaning is enabled, we have to emit messages to prevent too early
          // state eviction of downstream operators.
          return
        } else {
          // retract previous result
          if (generateRetraction) {
            out.collect(prevResultRow(currentKey))
          }
        }
      }
      // emit the new result
      out.collect(newResultRow(currentKey))

    } else {
      // we retracted the last record for this key
      // sent out a delete message
      if (!firstRow) {
        out.collect(prevResultRow(currentKey))
      }
      // and clear all state
      accState.remove(currentKey)
    }
  }

  protected def newResultRow(key: BaseRow): BaseRow = {
    resultRow.replace(key, newAggValue)
    BaseRowUtil.setAccumulate(resultRow)
    resultRow
  }

  protected def prevResultRow(key: BaseRow): BaseRow = {
    resultRow.replace(key, prevAggValue)
    BaseRowUtil.setRetract(resultRow)
    resultRow
  }

  override def onTimer(
      timestamp: Long,
      ctx: ProcessFunction.OnTimerContext,
      out: Collector[BaseRow]): Unit = {
    if (needToCleanupState(timestamp)) {
      cleanupState(accState)
      function.cleanup()
    }
  }

  override def endInput(out: Collector[BaseRow]): Unit = {
    // output default value if grouping without key and it's an empty group
    if (groupWithoutKey) {
      executionContext.setCurrentKey(BinaryRowUtil.EMPTY_ROW)
      val accumulators = accState.get(BinaryRowUtil.EMPTY_ROW)
      if (inputCounter.countIsZero(accumulators)) {
        function.setAccumulators(function.createAccumulators)
        newAggValue = function.getValue
        out.collect(newResultRow(BinaryRowUtil.EMPTY_ROW))
      }
      // no need to update acc state, because this is the end
    }
  }

  override def close(): Unit = {
    if (function != null) {
      function.close()
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy