All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.operators.source.SourceOutputWithWatermarks Maven / Gradle / Ivy

There is a newer version: 1.19.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.operators.source;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.eventtime.TimestampAssigner;
import org.apache.flink.api.common.eventtime.Watermark;
import org.apache.flink.api.common.eventtime.WatermarkGenerator;
import org.apache.flink.api.common.eventtime.WatermarkOutput;
import org.apache.flink.api.connector.source.SourceOutput;
import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.streaming.runtime.tasks.ExceptionInChainedOperatorException;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * Implementation of the SourceOutput. The records emitted to this output are pushed into a given
 * {@link PushingAsyncDataInput.DataOutput}. The watermarks are pushed into the same output, or into
 * a separate {@link WatermarkOutput}, if one is provided.
 *
 * 

Periodic Watermarks

* *

This output does not implement automatic periodic watermark emission. The method {@link * SourceOutputWithWatermarks#emitPeriodicWatermark()} needs to be called periodically. * *

Note on Performance Considerations

* *

The methods {@link SourceOutput#collect(Object)} and {@link SourceOutput#collect(Object, * long)} are highly performance-critical (part of the hot loop). To make the code as JIT friendly * as possible, we want to have only a single implementation of these two methods, across all * classes. That way, the JIT compiler can de-virtualize (and inline) them better. * *

Currently, we have one implementation of these methods for the case where we don't need * watermarks (see class {@link NoOpTimestampsAndWatermarks}) and one for the case where we do (this * class). When the JVM is dedicated to a single job (or type of job) only one of these classes will * be loaded. In mixed job setups, we still have a bimorphic method (rather than a * poly/-/mega-morphic method). * * @param The type of emitted records. */ @Internal public class SourceOutputWithWatermarks implements SourceOutput { private final PushingAsyncDataInput.DataOutput recordsOutput; private final TimestampAssigner timestampAssigner; private final WatermarkGenerator watermarkGenerator; private final WatermarkOutput onEventWatermarkOutput; private final WatermarkOutput periodicWatermarkOutput; private final StreamRecord reusingRecord; /** * Creates a new SourceOutputWithWatermarks that emits records to the given DataOutput and * watermarks to the (possibly different) WatermarkOutput. */ protected SourceOutputWithWatermarks( PushingAsyncDataInput.DataOutput recordsOutput, WatermarkOutput onEventWatermarkOutput, WatermarkOutput periodicWatermarkOutput, TimestampAssigner timestampAssigner, WatermarkGenerator watermarkGenerator) { this.recordsOutput = checkNotNull(recordsOutput); this.onEventWatermarkOutput = checkNotNull(onEventWatermarkOutput); this.periodicWatermarkOutput = checkNotNull(periodicWatermarkOutput); this.timestampAssigner = checkNotNull(timestampAssigner); this.watermarkGenerator = checkNotNull(watermarkGenerator); this.reusingRecord = new StreamRecord<>(null); } // ------------------------------------------------------------------------ // SourceOutput Methods // // Note that the two methods below are final, as a partial enforcement // of the performance design goal mentioned in the class-level comment. // ------------------------------------------------------------------------ @Override public final void collect(T record) { collect(record, TimestampAssigner.NO_TIMESTAMP); } @Override public final void collect(T record, long timestamp) { try { final long assignedTimestamp = timestampAssigner.extractTimestamp(record, timestamp); // IMPORTANT: The event must be emitted before the watermark generator is called. recordsOutput.emitRecord(reusingRecord.replace(record, assignedTimestamp)); watermarkGenerator.onEvent(record, assignedTimestamp, onEventWatermarkOutput); } catch (ExceptionInChainedOperatorException e) { throw e; } catch (Exception e) { throw new ExceptionInChainedOperatorException(e); } } // ------------------------------------------------------------------------ // WatermarkOutput Methods // // These two methods are final as well, to enforce the contract that the // watermarks from emitWatermark(Watermark) go to the same output as the // watermarks from the watermarkGenerator.onEvent(...) calls in the collect(...) // methods. // ------------------------------------------------------------------------ @Override public final void emitWatermark(Watermark watermark) { onEventWatermarkOutput.emitWatermark(watermark); } @Override public final void markIdle() { onEventWatermarkOutput.markIdle(); } @Override public void markActive() { onEventWatermarkOutput.markActive(); } public final void emitPeriodicWatermark() { watermarkGenerator.onPeriodicEmit(periodicWatermarkOutput); } // ------------------------------------------------------------------------ // Factories // ------------------------------------------------------------------------ /** * Creates a new SourceOutputWithWatermarks that emits records to the given DataOutput and * watermarks to the different WatermarkOutputs. */ public static SourceOutputWithWatermarks createWithSeparateOutputs( PushingAsyncDataInput.DataOutput recordsOutput, WatermarkOutput onEventWatermarkOutput, WatermarkOutput periodicWatermarkOutput, TimestampAssigner timestampAssigner, WatermarkGenerator watermarkGenerator) { return new SourceOutputWithWatermarks<>( recordsOutput, onEventWatermarkOutput, periodicWatermarkOutput, timestampAssigner, watermarkGenerator); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy