org.apache.flink.table.runtime.aggregate.ProcTimeBoundedRowsOver.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of flink-table-planner_2.11 Show documentation
This module bridges Table/SQL API and runtime. It contains all resources that are required during pre-flight and runtime phase.
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flink.table.runtime.aggregate

import java.util
import java.util.{List => JList}

import org.apache.flink.api.common.state.{MapState, MapStateDescriptor, ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeInformation}
import org.apache.flink.api.java.typeutils.{ListTypeInfo, RowTypeInfo}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.table.api.StreamQueryConfig
import org.apache.flink.table.codegen.{Compiler, GeneratedAggregationsFunction}
import org.apache.flink.table.runtime.types.{CRow, CRowTypeInfo}
import org.apache.flink.table.util.Logging
import org.apache.flink.types.Row
import org.apache.flink.util.{Collector, Preconditions}

/**
  * Process Function for ROW clause processing-time bounded OVER window
  *
  * @param genAggregations      Generated aggregate helper function
  * @param precedingOffset      preceding offset
  * @param aggregatesTypeInfo   row type info of aggregation
  * @param inputType            row type info of input row
  */
class ProcTimeBoundedRowsOver[K](
    genAggregations: GeneratedAggregationsFunction,
    precedingOffset: Long,
    aggregatesTypeInfo: RowTypeInfo,
    inputType: TypeInformation[CRow],
    queryConfig: StreamQueryConfig)
  extends ProcessFunctionWithCleanupState[K, CRow, CRow](queryConfig)
    with Compiler[GeneratedAggregations]
    with Logging {

  Preconditions.checkArgument(precedingOffset > 0)

  private var accumulatorState: ValueState[Row] = _
  private var rowMapState: MapState[Long, JList[Row]] = _
  private var output: CRow = _
  private var counterState: ValueState[Long] = _
  private var smallestTsState: ValueState[Long] = _

  private var function: GeneratedAggregations = _

  override def open(config: Configuration) {
    LOG.debug(s"Compiling AggregateHelper: ${genAggregations.name} \n\n" +
                s"Code:\n${genAggregations.code}")
    val clazz = compile(
      getRuntimeContext.getUserCodeClassLoader,
      genAggregations.name,
      genAggregations.code)
    LOG.debug("Instantiating AggregateHelper.")
    function = clazz.newInstance()
    function.open(getRuntimeContext)

    output = new CRow(function.createOutputRow(), true)
    // We keep the elements received in a Map state keyed
    // by the ingestion time in the operator.
    // we also keep counter of processed elements
    // and timestamp of oldest element
    val rowListTypeInfo: TypeInformation[JList[Row]] =
      new ListTypeInfo[Row](inputType.asInstanceOf[CRowTypeInfo].rowType)
        .asInstanceOf[TypeInformation[JList[Row]]]

    val mapStateDescriptor: MapStateDescriptor[Long, JList[Row]] =
      new MapStateDescriptor[Long, JList[Row]]("windowBufferMapState",
        BasicTypeInfo.LONG_TYPE_INFO.asInstanceOf[TypeInformation[Long]], rowListTypeInfo)
    rowMapState = getRuntimeContext.getMapState(mapStateDescriptor)

    val aggregationStateDescriptor: ValueStateDescriptor[Row] =
      new ValueStateDescriptor[Row]("aggregationState", aggregatesTypeInfo)
    accumulatorState = getRuntimeContext.getState(aggregationStateDescriptor)

    val processedCountDescriptor : ValueStateDescriptor[Long] =
       new ValueStateDescriptor[Long]("processedCountState", classOf[Long])
    counterState = getRuntimeContext.getState(processedCountDescriptor)

    val smallestTimestampDescriptor : ValueStateDescriptor[Long] =
       new ValueStateDescriptor[Long]("smallestTSState", classOf[Long])
    smallestTsState = getRuntimeContext.getState(smallestTimestampDescriptor)

    initCleanupTimeState("ProcTimeBoundedRowsOverCleanupTime")
  }

  override def processElement(
    inputC: CRow,
    ctx: KeyedProcessFunction[K, CRow, CRow]#Context,
    out: Collector[CRow]): Unit = {

    val input = inputC.row

    val currentTime = ctx.timerService.currentProcessingTime

    // register state-cleanup timer
    processCleanupTimer(ctx, currentTime)

    // initialize state for the processed element
    var accumulators = accumulatorState.value
    if (accumulators == null) {
      accumulators = function.createAccumulators()
    }

    // get smallest timestamp
    var smallestTs = smallestTsState.value
    if (smallestTs == 0L) {
      smallestTs = currentTime
      smallestTsState.update(smallestTs)
    }
    // get previous counter value
    var counter = counterState.value

    if (counter == precedingOffset) {
      val retractList = rowMapState.get(smallestTs)

      // get oldest element beyond buffer size
      // and if oldest element exist, retract value
      val retractRow = retractList.get(0)
      function.retract(accumulators, retractRow)
      retractList.remove(0)

      // if reference timestamp list not empty, keep the list
      if (!retractList.isEmpty) {
        rowMapState.put(smallestTs, retractList)
      } // if smallest timestamp list is empty, remove and find new smallest
      else {
        rowMapState.remove(smallestTs)
        val iter = rowMapState.keys.iterator
        var currentTs: Long = 0L
        var newSmallestTs: Long = Long.MaxValue
        while (iter.hasNext) {
          currentTs = iter.next
          if (currentTs < newSmallestTs) {
            newSmallestTs = currentTs
          }
        }
        smallestTsState.update(newSmallestTs)
      }
    } // we update the counter only while buffer is getting filled
    else {
      counter += 1
      counterState.update(counter)
    }

    // copy forwarded fields in output row
    function.setForwardedFields(input, output.row)

    // accumulate current row and set aggregate in output row
    function.accumulate(accumulators, input)
    function.setAggregationResults(accumulators, output.row)

    // update map state, accumulator state, counter and timestamp
    val currentTimeState = rowMapState.get(currentTime)
    if (currentTimeState != null) {
      currentTimeState.add(input)
      rowMapState.put(currentTime, currentTimeState)
    } else { // add new input
      val newList = new util.ArrayList[Row]
      newList.add(input)
      rowMapState.put(currentTime, newList)
    }

    accumulatorState.update(accumulators)

    out.collect(output)
  }

  override def onTimer(
    timestamp: Long,
    ctx: KeyedProcessFunction[K, CRow, CRow]#OnTimerContext,
    out: Collector[CRow]): Unit = {

    if (stateCleaningEnabled) {
      cleanupState(rowMapState, accumulatorState, counterState, smallestTsState)
      function.cleanup()
    }
  }

  override def close(): Unit = {
    function.close()
  }
}