All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.table.planner.plan.nodes.exec.stream.StreamExecGroupWindowAggregate Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.planner.plan.nodes.exec.stream;

import org.apache.flink.api.dag.Transformation;
import org.apache.flink.streaming.api.transformations.OneInputTransformation;
import org.apache.flink.table.api.TableConfig;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.expressions.FieldReferenceExpression;
import org.apache.flink.table.expressions.ValueLiteralExpression;
import org.apache.flink.table.planner.calcite.FlinkTypeFactory;
import org.apache.flink.table.planner.codegen.CodeGeneratorContext;
import org.apache.flink.table.planner.codegen.EqualiserCodeGenerator;
import org.apache.flink.table.planner.codegen.agg.AggsHandlerCodeGenerator;
import org.apache.flink.table.planner.delegation.PlannerBase;
import org.apache.flink.table.planner.expressions.PlannerNamedWindowProperty;
import org.apache.flink.table.planner.expressions.PlannerWindowProperty;
import org.apache.flink.table.planner.plan.logical.LogicalWindow;
import org.apache.flink.table.planner.plan.logical.SessionGroupWindow;
import org.apache.flink.table.planner.plan.logical.SlidingGroupWindow;
import org.apache.flink.table.planner.plan.logical.TumblingGroupWindow;
import org.apache.flink.table.planner.plan.nodes.exec.ExecEdge;
import org.apache.flink.table.planner.plan.nodes.exec.ExecNode;
import org.apache.flink.table.planner.plan.nodes.exec.InputProperty;
import org.apache.flink.table.planner.plan.nodes.exec.serde.LogicalWindowJsonDeserializer;
import org.apache.flink.table.planner.plan.nodes.exec.serde.LogicalWindowJsonSerializer;
import org.apache.flink.table.planner.plan.utils.AggregateInfoList;
import org.apache.flink.table.planner.plan.utils.KeySelectorUtil;
import org.apache.flink.table.planner.plan.utils.WindowEmitStrategy;
import org.apache.flink.table.planner.utils.JavaScalaConversionUtil;
import org.apache.flink.table.runtime.generated.GeneratedClass;
import org.apache.flink.table.runtime.generated.GeneratedNamespaceAggsHandleFunction;
import org.apache.flink.table.runtime.generated.GeneratedNamespaceTableAggsHandleFunction;
import org.apache.flink.table.runtime.generated.GeneratedRecordEqualiser;
import org.apache.flink.table.runtime.keyselector.RowDataKeySelector;
import org.apache.flink.table.runtime.operators.window.CountWindow;
import org.apache.flink.table.runtime.operators.window.TimeWindow;
import org.apache.flink.table.runtime.operators.window.WindowOperator;
import org.apache.flink.table.runtime.operators.window.WindowOperatorBuilder;
import org.apache.flink.table.runtime.types.LogicalTypeDataTypeConverter;
import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
import org.apache.flink.table.runtime.util.TimeWindowUtil;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;

import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonCreator;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.annotation.JsonSerialize;

import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.tools.RelBuilder;
import org.apache.commons.lang3.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.Duration;
import java.time.ZoneId;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.flink.table.planner.plan.utils.AggregateUtil.hasRowIntervalType;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.hasTimeIntervalType;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.isProctimeAttribute;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.isRowtimeAttribute;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.isTableAggregate;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.timeFieldIndex;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.toDuration;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.toLong;
import static org.apache.flink.table.planner.plan.utils.AggregateUtil.transformToStreamAggregateInfoList;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * Stream {@link ExecNode} for either group window aggregate or group window table aggregate.
 *
 * 

The differences between {@link StreamExecWindowAggregate} and {@link * StreamExecGroupWindowAggregate} is that, this node is translated from window TVF syntax, but the * * other is from the legacy GROUP WINDOW FUNCTION syntax. In the long future, {@link * StreamExecGroupWindowAggregate} will be dropped. */ @JsonIgnoreProperties(ignoreUnknown = true) public class StreamExecGroupWindowAggregate extends StreamExecAggregateBase { public static final String FIELD_NAME_WINDOW = "window"; public static final String FIELD_NAME_NAMED_WINDOW_PROPERTIES = "namedWindowProperties"; private static final Logger LOGGER = LoggerFactory.getLogger(StreamExecGroupWindowAggregate.class); @JsonProperty(FIELD_NAME_GROUPING) private final int[] grouping; @JsonProperty(FIELD_NAME_AGG_CALLS) private final AggregateCall[] aggCalls; @JsonProperty(FIELD_NAME_WINDOW) @JsonSerialize(using = LogicalWindowJsonSerializer.class) @JsonDeserialize(using = LogicalWindowJsonDeserializer.class) private final LogicalWindow window; @JsonProperty(FIELD_NAME_NAMED_WINDOW_PROPERTIES) private final PlannerNamedWindowProperty[] namedWindowProperties; @JsonProperty(FIELD_NAME_NEED_RETRACTION) private final boolean needRetraction; public StreamExecGroupWindowAggregate( int[] grouping, AggregateCall[] aggCalls, LogicalWindow window, PlannerNamedWindowProperty[] namedWindowProperties, boolean needRetraction, InputProperty inputProperty, RowType outputType, String description) { this( grouping, aggCalls, window, namedWindowProperties, needRetraction, getNewNodeId(), Collections.singletonList(inputProperty), outputType, description); } @JsonCreator public StreamExecGroupWindowAggregate( @JsonProperty(FIELD_NAME_GROUPING) int[] grouping, @JsonProperty(FIELD_NAME_AGG_CALLS) AggregateCall[] aggCalls, @JsonProperty(FIELD_NAME_WINDOW) LogicalWindow window, @JsonProperty(FIELD_NAME_NAMED_WINDOW_PROPERTIES) PlannerNamedWindowProperty[] namedWindowProperties, @JsonProperty(FIELD_NAME_NEED_RETRACTION) boolean needRetraction, @JsonProperty(FIELD_NAME_ID) int id, @JsonProperty(FIELD_NAME_INPUT_PROPERTIES) List inputProperties, @JsonProperty(FIELD_NAME_OUTPUT_TYPE) RowType outputType, @JsonProperty(FIELD_NAME_DESCRIPTION) String description) { super(id, inputProperties, outputType, description); checkArgument(inputProperties.size() == 1); this.grouping = checkNotNull(grouping); this.aggCalls = checkNotNull(aggCalls); this.window = checkNotNull(window); this.namedWindowProperties = checkNotNull(namedWindowProperties); this.needRetraction = needRetraction; } @SuppressWarnings("unchecked") @Override protected Transformation translateToPlanInternal(PlannerBase planner) { final boolean isCountWindow; if (window instanceof TumblingGroupWindow) { isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size()); } else if (window instanceof SlidingGroupWindow) { isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size()); } else { isCountWindow = false; } final TableConfig config = planner.getTableConfig(); if (isCountWindow && grouping.length > 0 && config.getMinIdleStateRetentionTime() < 0) { LOGGER.warn( "No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent " + "excessive state size. You may specify a retention time of 0 to not clean up the state."); } final ExecEdge inputEdge = getInputEdges().get(0); final Transformation inputTransform = (Transformation) inputEdge.translateToPlan(planner); final RowType inputRowType = (RowType) inputEdge.getOutputType(); final int inputTimeFieldIndex; if (isRowtimeAttribute(window.timeAttribute())) { inputTimeFieldIndex = timeFieldIndex( FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute()); if (inputTimeFieldIndex < 0) { throw new TableException( "Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue."); } } else { inputTimeFieldIndex = -1; } final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone( window.timeAttribute().getOutputDataType().getLogicalType(), config); final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length]; Arrays.fill(aggCallNeedRetractions, needRetraction); final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList( inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, // isStateBackendDataViews true); // needDistinctInfo final GeneratedClass aggCodeGenerator = createAggsHandler( aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone); final LogicalType[] aggResultTypes = extractLogicalTypes(aggInfoList.getActualValueTypes()); final LogicalType[] windowPropertyTypes = Arrays.stream(namedWindowProperties) .map(p -> p.getProperty().getResultType()) .toArray(LogicalType[]::new); final EqualiserCodeGenerator generator = new EqualiserCodeGenerator(ArrayUtils.addAll(aggResultTypes, windowPropertyTypes)); final GeneratedRecordEqualiser equaliser = generator.generateRecordEqualiser("WindowValueEqualiser"); final LogicalType[] aggValueTypes = extractLogicalTypes(aggInfoList.getActualValueTypes()); final LogicalType[] accTypes = extractLogicalTypes(aggInfoList.getAccTypes()); final WindowOperator operator = createWindowOperator( planner.getTableConfig(), aggCodeGenerator, equaliser, accTypes, windowPropertyTypes, aggValueTypes, inputRowType.getChildren().toArray(new LogicalType[0]), inputTimeFieldIndex, shiftTimeZone); final OneInputTransformation transform = new OneInputTransformation<>( inputTransform, getDescription(), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism()); // set KeyType and Selector for state final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType)); transform.setStateKeySelector(selector); transform.setStateKeyType(selector.getProducedType()); return transform; } private LogicalType[] extractLogicalTypes(DataType[] dataTypes) { return Arrays.stream(dataTypes) .map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType) .toArray(LogicalType[]::new); } private GeneratedClass createAggsHandler( AggregateInfoList aggInfoList, TableConfig config, RelBuilder relBuilder, List fieldTypes, ZoneId shiftTimeZone) { final boolean needMerge; final Class windowClass; if (window instanceof SlidingGroupWindow) { ValueLiteralExpression size = ((SlidingGroupWindow) window).size(); needMerge = hasTimeIntervalType(size); windowClass = hasRowIntervalType(size) ? CountWindow.class : TimeWindow.class; } else if (window instanceof TumblingGroupWindow) { needMerge = false; ValueLiteralExpression size = ((TumblingGroupWindow) window).size(); windowClass = hasRowIntervalType(size) ? CountWindow.class : TimeWindow.class; } else if (window instanceof SessionGroupWindow) { needMerge = true; windowClass = TimeWindow.class; } else { throw new TableException("Unsupported window: " + window.toString()); } final AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator( new CodeGeneratorContext(config), relBuilder, JavaScalaConversionUtil.toScala(fieldTypes), false) // copyInputField .needAccumulate(); if (needMerge) { generator.needMerge(0, false, null); } if (needRetraction) { generator.needRetract(); } final List windowProperties = Arrays.asList( Arrays.stream(namedWindowProperties) .map(PlannerNamedWindowProperty::getProperty) .toArray(PlannerWindowProperty[]::new)); final boolean isTableAggregate = isTableAggregate(Arrays.asList(aggInfoList.getActualAggregateCalls())); if (isTableAggregate) { return generator.generateNamespaceTableAggsHandler( "GroupingWindowTableAggsHandler", aggInfoList, JavaScalaConversionUtil.toScala(windowProperties), windowClass, shiftTimeZone); } else { return generator.generateNamespaceAggsHandler( "GroupingWindowAggsHandler", aggInfoList, JavaScalaConversionUtil.toScala(windowProperties), windowClass, shiftTimeZone); } } private WindowOperator createWindowOperator( TableConfig tableConfig, GeneratedClass aggsHandler, GeneratedRecordEqualiser recordEqualiser, LogicalType[] accTypes, LogicalType[] windowPropertyTypes, LogicalType[] aggValueTypes, LogicalType[] inputFields, int timeFieldIndex, ZoneId shiftTimeZone) { WindowOperatorBuilder builder = WindowOperatorBuilder.builder() .withInputFields(inputFields) .withShiftTimezone(shiftTimeZone); if (window instanceof TumblingGroupWindow) { TumblingGroupWindow tumblingWindow = (TumblingGroupWindow) window; FieldReferenceExpression timeField = tumblingWindow.timeField(); ValueLiteralExpression size = tumblingWindow.size(); if (isProctimeAttribute(timeField) && hasTimeIntervalType(size)) { builder = builder.tumble(toDuration(size)).withProcessingTime(); } else if (isRowtimeAttribute(timeField) && hasTimeIntervalType(size)) { builder = builder.tumble(toDuration(size)).withEventTime(timeFieldIndex); } else if (isProctimeAttribute(timeField) && hasRowIntervalType(size)) { builder = builder.countWindow(toLong(size)); } else { // TODO: EventTimeTumblingGroupWindow should sort the stream on event time // before applying the windowing logic. Otherwise, this would be the same as a // ProcessingTimeTumblingGroupWindow throw new UnsupportedOperationException( "Event-time grouping windows on row intervals are currently not supported."); } } else if (window instanceof SlidingGroupWindow) { SlidingGroupWindow slidingWindow = (SlidingGroupWindow) window; FieldReferenceExpression timeField = slidingWindow.timeField(); ValueLiteralExpression size = slidingWindow.size(); ValueLiteralExpression slide = slidingWindow.slide(); if (isProctimeAttribute(timeField) && hasTimeIntervalType(size)) { builder = builder.sliding(toDuration(size), toDuration(slide)).withProcessingTime(); } else if (isRowtimeAttribute(timeField) && hasTimeIntervalType(size)) { builder = builder.sliding(toDuration(size), toDuration(slide)) .withEventTime(timeFieldIndex); } else if (isProctimeAttribute(timeField) && hasRowIntervalType(size)) { builder = builder.countWindow(toLong(size), toLong(slide)); } else { // TODO: EventTimeTumblingGroupWindow should sort the stream on event time // before applying the windowing logic. Otherwise, this would be the same as a // ProcessingTimeTumblingGroupWindow throw new UnsupportedOperationException( "Event-time grouping windows on row intervals are currently not supported."); } } else if (window instanceof SessionGroupWindow) { SessionGroupWindow sessionWindow = (SessionGroupWindow) window; FieldReferenceExpression timeField = sessionWindow.timeField(); ValueLiteralExpression gap = sessionWindow.gap(); if (isProctimeAttribute(timeField)) { builder = builder.session(toDuration(gap)).withProcessingTime(); } else if (isRowtimeAttribute(timeField)) { builder = builder.session(toDuration(gap)).withEventTime(timeFieldIndex); } else { throw new UnsupportedOperationException("This should not happen."); } } else { throw new TableException("Unsupported window: " + window.toString()); } WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(tableConfig, window); if (emitStrategy.produceUpdates()) { // mark this operator will send retraction and set new trigger builder.produceUpdates() .triggering(emitStrategy.getTrigger()) .withAllowedLateness(Duration.ofMillis(emitStrategy.getAllowLateness())); } if (aggsHandler instanceof GeneratedNamespaceAggsHandleFunction) { return builder.aggregate( (GeneratedNamespaceAggsHandleFunction) aggsHandler, recordEqualiser, accTypes, aggValueTypes, windowPropertyTypes) .build(); } else if (aggsHandler instanceof GeneratedNamespaceTableAggsHandleFunction) { return builder.aggregate( (GeneratedNamespaceTableAggsHandleFunction) aggsHandler, accTypes, aggValueTypes, windowPropertyTypes) .build(); } else { throw new TableException( "Unsupported agg handler class: " + aggsHandler.getClass().getSimpleName()); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy