org.apache.flink.table.runtime.aggregate.ProcTimeBoundedRangeOver.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of flink-table-planner_2.11 Show documentation
Show all versions of flink-table-planner_2.11 Show documentation
This module bridges Table/SQL API and runtime. It contains
all resources that are required during pre-flight and runtime
phase.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.runtime.aggregate
import org.apache.flink.api.java.typeutils.RowTypeInfo
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.types.Row
import org.apache.flink.util.Collector
import org.apache.flink.api.common.state._
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.ListTypeInfo
import java.util.{ArrayList, List => JList}
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.streaming.api.operators.TimestampedCollector
import org.apache.flink.table.api.StreamQueryConfig
import org.apache.flink.table.codegen.{Compiler, GeneratedAggregationsFunction}
import org.apache.flink.table.runtime.types.{CRow, CRowTypeInfo}
import org.apache.flink.table.util.Logging
/**
* Process Function used for the aggregate in bounded proc-time OVER window
* [[org.apache.flink.streaming.api.datastream.DataStream]]
*
* @param genAggregations Generated aggregate helper function
* @param precedingTimeBoundary Is used to indicate the processing time boundaries
* @param aggregatesTypeInfo row type info of aggregation
* @param inputType row type info of input row
*/
class ProcTimeBoundedRangeOver[K](
genAggregations: GeneratedAggregationsFunction,
precedingTimeBoundary: Long,
aggregatesTypeInfo: RowTypeInfo,
inputType: TypeInformation[CRow],
queryConfig: StreamQueryConfig)
extends ProcessFunctionWithCleanupState[K, CRow, CRow](queryConfig)
with Compiler[GeneratedAggregations]
with Logging {
private var output: CRow = _
private var accumulatorState: ValueState[Row] = _
private var rowMapState: MapState[Long, JList[Row]] = _
private var function: GeneratedAggregations = _
override def open(config: Configuration) {
LOG.debug(s"Compiling AggregateHelper: ${genAggregations.name} \n\n" +
s"Code:\n${genAggregations.code}")
val clazz = compile(
getRuntimeContext.getUserCodeClassLoader,
genAggregations.name,
genAggregations.code)
LOG.debug("Instantiating AggregateHelper.")
function = clazz.newInstance()
function.open(getRuntimeContext)
output = new CRow(function.createOutputRow(), true)
// We keep the elements received in a MapState indexed based on their ingestion time
val rowListTypeInfo: TypeInformation[JList[Row]] =
new ListTypeInfo[Row](inputType.asInstanceOf[CRowTypeInfo].rowType)
.asInstanceOf[TypeInformation[JList[Row]]]
val mapStateDescriptor: MapStateDescriptor[Long, JList[Row]] =
new MapStateDescriptor[Long, JList[Row]]("rowmapstate",
BasicTypeInfo.LONG_TYPE_INFO.asInstanceOf[TypeInformation[Long]], rowListTypeInfo)
rowMapState = getRuntimeContext.getMapState(mapStateDescriptor)
val stateDescriptor: ValueStateDescriptor[Row] =
new ValueStateDescriptor[Row]("overState", aggregatesTypeInfo)
accumulatorState = getRuntimeContext.getState(stateDescriptor)
initCleanupTimeState("ProcTimeBoundedRangeOverCleanupTime")
}
override def processElement(
input: CRow,
ctx: KeyedProcessFunction[K, CRow, CRow]#Context,
out: Collector[CRow]): Unit = {
val currentTime = ctx.timerService.currentProcessingTime
// register state-cleanup timer
processCleanupTimer(ctx, currentTime)
// buffer the event incoming event
// add current element to the window list of elements with corresponding timestamp
var rowList = rowMapState.get(currentTime)
// null value means that this si the first event received for this timestamp
if (rowList == null) {
rowList = new ArrayList[Row]()
// register timer to process event once the current millisecond passed
ctx.timerService.registerProcessingTimeTimer(currentTime + 1)
}
rowList.add(input.row)
rowMapState.put(currentTime, rowList)
}
override def onTimer(
timestamp: Long,
ctx: KeyedProcessFunction[K, CRow, CRow]#OnTimerContext,
out: Collector[CRow]): Unit = {
if (stateCleaningEnabled) {
val cleanupTime = cleanupTimeState.value()
if (null != cleanupTime && timestamp == cleanupTime) {
// clean up and return
cleanupState(rowMapState, accumulatorState)
function.cleanup()
return
}
}
// remove timestamp set outside of ProcessFunction.
out.asInstanceOf[TimestampedCollector[_]].eraseTimestamp()
// we consider the original timestamp of events
// that have registered this time trigger 1 ms ago
val currentTime = timestamp - 1
// get the list of elements of current proctime
val currentElements = rowMapState.get(currentTime)
// Expired clean-up timers pass the needToCleanupState check.
// Perform a null check to verify that we have data to process.
if (null == currentElements) {
return
}
// initialize the accumulators
var accumulators = accumulatorState.value()
if (null == accumulators) {
accumulators = function.createAccumulators()
}
// update the elements to be removed and retract them from aggregators
val limit = currentTime - precedingTimeBoundary
// we iterate through all elements in the window buffer based on timestamp keys
// when we find timestamps that are out of interest, we retrieve corresponding elements
// and eliminate them. Multiple elements could have been received at the same timestamp
// the removal of old elements happens only once per proctime as onTimer is called only once
val iter = rowMapState.iterator
while (iter.hasNext) {
val entry = iter.next()
val elementKey = entry.getKey
if (elementKey < limit) {
// element key outside of window. Retract values
val elementsRemove = entry.getValue
var iRemove = 0
while (iRemove < elementsRemove.size()) {
val retractRow = elementsRemove.get(iRemove)
function.retract(accumulators, retractRow)
iRemove += 1
}
iter.remove()
}
}
// add current elements to aggregator. Multiple elements might
// have arrived in the same proctime
// the same accumulator value will be computed for all elements
var iElemenets = 0
while (iElemenets < currentElements.size()) {
val input = currentElements.get(iElemenets)
function.accumulate(accumulators, input)
iElemenets += 1
}
// we need to build the output and emit for every event received at this proctime
iElemenets = 0
while (iElemenets < currentElements.size()) {
val input = currentElements.get(iElemenets)
// set the fields of the last event to carry on with the aggregates
function.setForwardedFields(input, output.row)
// add the accumulators values to result
function.setAggregationResults(accumulators, output.row)
out.collect(output)
iElemenets += 1
}
// update the value of accumulators for future incremental computation
accumulatorState.update(accumulators)
}
override def close(): Unit = {
function.close()
}
}