All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.source.StreamReadOperator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.source;

import org.apache.hudi.metrics.FlinkStreamReadMetrics;
import org.apache.hudi.table.format.mor.MergeOnReadInputFormat;
import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;

import org.apache.flink.api.common.operators.MailboxExecutor;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.runtime.state.JavaSerializer;
import org.apache.flink.runtime.state.StateInitializationContext;
import org.apache.flink.runtime.state.StateSnapshotContext;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory;
import org.apache.flink.streaming.api.operators.Output;
import org.apache.flink.streaming.api.operators.StreamOperator;
import org.apache.flink.streaming.api.operators.StreamOperatorParameters;
import org.apache.flink.streaming.api.operators.StreamSourceContexts;
import org.apache.flink.streaming.api.operators.YieldingOperatorFactory;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService;
import org.apache.flink.table.data.RowData;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingDeque;

/**
 * The operator that reads the {@link MergeOnReadInputSplit splits} received from the preceding {@link
 * StreamReadMonitoringFunction}. Contrary to the {@link StreamReadMonitoringFunction} which has a parallelism of 1,
 * this operator can have multiple parallelism.
 *
 * 

As soon as an input split {@link MergeOnReadInputSplit} is received, it is put into a queue, * the {@code MailboxExecutor} read the actual data of the split. * This architecture allows the separation of split reading from processing the checkpoint barriers, * thus removing any potential back-pressure. */ public class StreamReadOperator extends AbstractStreamOperator implements OneInputStreamOperator { private static final Logger LOG = LoggerFactory.getLogger(StreamReadOperator.class); private static final int MINI_BATCH_SIZE = 2048; // It's the same thread that runs this operator and checkpoint actions. Use this executor to schedule only // splits for subsequent reading, so that a new checkpoint could be triggered without blocking a long time // for exhausting all scheduled split reading tasks. private final MailboxExecutor executor; private MergeOnReadInputFormat format; private transient SourceFunction.SourceContext sourceContext; private transient ListState inputSplitsState; private transient Queue splits; // Splits are read by the same thread that calls #processElement. Each read task is submitted to that thread by adding // them to the executor. This state is used to ensure that only one read task is in that splits queue at a time, so that // read tasks do not accumulate ahead of checkpoint tasks. When there is a read task in the queue, this is set to RUNNING. // When there are no more files to read, this will be set to IDLE. private transient volatile SplitState currentSplitState; private transient FlinkStreamReadMetrics readMetrics; private StreamReadOperator(MergeOnReadInputFormat format, ProcessingTimeService timeService, MailboxExecutor mailboxExecutor) { this.format = Preconditions.checkNotNull(format, "The InputFormat should not be null."); this.processingTimeService = timeService; this.executor = Preconditions.checkNotNull(mailboxExecutor, "The mailboxExecutor should not be null."); } @Override public void initializeState(StateInitializationContext context) throws Exception { super.initializeState(context); registerMetrics(); // TODO Replace Java serialization with Avro approach to keep state compatibility. inputSplitsState = context.getOperatorStateStore().getListState( new ListStateDescriptor<>("splits", new JavaSerializer<>())); // Initialize the current split state to IDLE. currentSplitState = SplitState.IDLE; // Recover splits state from flink state backend if possible. splits = new LinkedBlockingDeque<>(); if (context.isRestored()) { int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); LOG.info("Restoring state for operator {} (task ID: {}).", getClass().getSimpleName(), subtaskIdx); for (MergeOnReadInputSplit split : inputSplitsState.get()) { splits.add(split); } } this.sourceContext = getSourceContext( getOperatorConfig().getTimeCharacteristic(), getProcessingTimeService(), output, getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval()); // Enqueue to process the recovered input splits. enqueueProcessSplits(); } @Override public void snapshotState(StateSnapshotContext context) throws Exception { super.snapshotState(context); inputSplitsState.clear(); inputSplitsState.addAll(new ArrayList<>(splits)); } @Override public void processElement(StreamRecord element) { splits.add(element.getValue()); enqueueProcessSplits(); } private void enqueueProcessSplits() { if (currentSplitState == SplitState.IDLE && !splits.isEmpty()) { currentSplitState = SplitState.RUNNING; executor.execute(this::processSplits, "process input split"); } } private void processSplits() throws IOException { MergeOnReadInputSplit split = splits.peek(); if (split == null) { currentSplitState = SplitState.IDLE; return; } // 1. open a fresh new input split and start reading as mini-batch // 2. if the input split has remaining records to read, switches to another runnable to handle // 3. if the input split reads to the end, close the format and remove the split from the queue #splits // 4. for each runnable, reads at most #MINI_BATCH_SIZE number of records if (format.isClosed()) { // This log is important to indicate the consuming process, // there is only one log message for one data bucket. LOG.info("Processing input split : {}", split); format.open(split); readMetrics.setSplitLatestCommit(split.getLatestCommit()); } try { consumeAsMiniBatch(split); } finally { currentSplitState = SplitState.IDLE; } // Re-schedule to process the next split. enqueueProcessSplits(); } /** * Consumes at most {@link #MINI_BATCH_SIZE} number of records * for the given input split {@code split}. * *

Note: close the input format and remove the input split for the queue {@link #splits} * if the split reads to the end. * * @param split The input split */ private void consumeAsMiniBatch(MergeOnReadInputSplit split) throws IOException { for (int i = 0; i < MINI_BATCH_SIZE; i++) { if (!format.reachedEnd()) { sourceContext.collect(format.nextRecord(null)); split.consume(); } else { // close the input format format.close(); // remove the split splits.poll(); break; } } } @Override public void processWatermark(Watermark mark) { // we do nothing because we emit our own watermarks if needed. } @Override public void close() throws Exception { super.close(); if (format != null) { format.close(); format.closeInputFormat(); format = null; } sourceContext = null; } @Override public void finish() throws Exception { super.finish(); output.close(); if (sourceContext != null) { sourceContext.emitWatermark(Watermark.MAX_WATERMARK); sourceContext.close(); sourceContext = null; } } private void registerMetrics() { MetricGroup metrics = getRuntimeContext().getMetricGroup(); readMetrics = new FlinkStreamReadMetrics(metrics); readMetrics.registerMetrics(); } public static OneInputStreamOperatorFactory factory(MergeOnReadInputFormat format) { return new OperatorFactory(format); } private enum SplitState { IDLE, RUNNING } private static class OperatorFactory extends AbstractStreamOperatorFactory implements OneInputStreamOperatorFactory, YieldingOperatorFactory { private final MergeOnReadInputFormat format; private OperatorFactory(MergeOnReadInputFormat format) { this.format = format; } @SuppressWarnings("unchecked") @Override public > O createStreamOperator(StreamOperatorParameters parameters) { StreamReadOperator operator = new StreamReadOperator(format, processingTimeService, getMailboxExecutor()); operator.setup(parameters.getContainingTask(), parameters.getStreamConfig(), parameters.getOutput()); return (O) operator; } @Override public Class getStreamOperatorClass(ClassLoader classLoader) { return StreamReadOperator.class; } } private static SourceFunction.SourceContext getSourceContext( TimeCharacteristic timeCharacteristic, ProcessingTimeService processingTimeService, Output> output, long watermarkInterval) { return StreamSourceContexts.getSourceContext( timeCharacteristic, processingTimeService, new Object(), // no actual locking needed output, watermarkInterval, -1, true); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy