com.ververica.cdc.connectors.base.source.reader.IncrementalSourceReader Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2023 Ververica Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ververica.cdc.connectors.base.source.reader;
import org.apache.flink.api.connector.source.SourceEvent;
import org.apache.flink.api.connector.source.SourceReaderContext;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.base.source.reader.RecordEmitter;
import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
import org.apache.flink.connector.base.source.reader.SingleThreadMultiplexSourceReaderBase;
import org.apache.flink.connector.base.source.reader.fetcher.SingleThreadFetcherManager;
import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.Preconditions;
import com.ververica.cdc.common.annotation.Experimental;
import com.ververica.cdc.connectors.base.config.SourceConfig;
import com.ververica.cdc.connectors.base.dialect.DataSourceDialect;
import com.ververica.cdc.connectors.base.source.meta.events.FinishedSnapshotSplitsAckEvent;
import com.ververica.cdc.connectors.base.source.meta.events.FinishedSnapshotSplitsReportEvent;
import com.ververica.cdc.connectors.base.source.meta.events.FinishedSnapshotSplitsRequestEvent;
import com.ververica.cdc.connectors.base.source.meta.events.StreamSplitMetaEvent;
import com.ververica.cdc.connectors.base.source.meta.events.StreamSplitMetaRequestEvent;
import com.ververica.cdc.connectors.base.source.meta.offset.Offset;
import com.ververica.cdc.connectors.base.source.meta.split.FinishedSnapshotSplitInfo;
import com.ververica.cdc.connectors.base.source.meta.split.SnapshotSplit;
import com.ververica.cdc.connectors.base.source.meta.split.SnapshotSplitState;
import com.ververica.cdc.connectors.base.source.meta.split.SourceRecords;
import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitBase;
import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitSerializer;
import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitState;
import com.ververica.cdc.connectors.base.source.meta.split.StreamSplit;
import com.ververica.cdc.connectors.base.source.meta.split.StreamSplitState;
import io.debezium.relational.TableId;
import io.debezium.relational.history.TableChanges;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/**
* The multi-parallel source reader for table snapshot phase from {@link SnapshotSplit} and then
* single-parallel source reader for table stream phase from {@link StreamSplit}.
*/
@Experimental
public class IncrementalSourceReader
extends SingleThreadMultiplexSourceReaderBase<
SourceRecords, T, SourceSplitBase, SourceSplitState> {
private static final Logger LOG = LoggerFactory.getLogger(IncrementalSourceReader.class);
private final Map finishedUnackedSplits;
private final Map uncompletedStreamSplits;
private final int subtaskId;
private final SourceSplitSerializer sourceSplitSerializer;
private final C sourceConfig;
private final DataSourceDialect dialect;
public IncrementalSourceReader(
FutureCompletingBlockingQueue> elementQueue,
Supplier> splitReaderSupplier,
RecordEmitter recordEmitter,
Configuration config,
SourceReaderContext context,
C sourceConfig,
SourceSplitSerializer sourceSplitSerializer,
DataSourceDialect dialect) {
super(
elementQueue,
new SingleThreadFetcherManager<>(elementQueue, splitReaderSupplier::get),
recordEmitter,
config,
context);
this.sourceConfig = sourceConfig;
this.finishedUnackedSplits = new HashMap<>();
this.uncompletedStreamSplits = new HashMap<>();
this.subtaskId = context.getIndexOfSubtask();
this.sourceSplitSerializer = checkNotNull(sourceSplitSerializer);
this.dialect = dialect;
}
@Override
public void start() {
if (getNumberOfCurrentlyAssignedSplits() == 0) {
context.sendSplitRequest();
}
}
@Override
protected SourceSplitState initializedState(SourceSplitBase split) {
if (split.isSnapshotSplit()) {
return new SnapshotSplitState(split.asSnapshotSplit());
} else {
return new StreamSplitState(split.asStreamSplit());
}
}
@Override
public List snapshotState(long checkpointId) {
// unfinished splits
List stateSplits = super.snapshotState(checkpointId);
// add finished snapshot splits that didn't receive ack yet
stateSplits.addAll(finishedUnackedSplits.values());
// add stream splits who are uncompleted
stateSplits.addAll(uncompletedStreamSplits.values());
return stateSplits;
}
@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
dialect.notifyCheckpointComplete(checkpointId);
}
@Override
protected void onSplitFinished(Map finishedSplitIds) {
for (SourceSplitState splitState : finishedSplitIds.values()) {
SourceSplitBase sourceSplit = splitState.toSourceSplit();
checkState(
sourceSplit.isSnapshotSplit(),
String.format(
"Only snapshot split could finish, but the actual split is stream split %s",
sourceSplit));
finishedUnackedSplits.put(sourceSplit.splitId(), sourceSplit.asSnapshotSplit());
}
reportFinishedSnapshotSplitsIfNeed();
context.sendSplitRequest();
}
@Override
public void addSplits(List splits) {
// restore for finishedUnackedSplits
List unfinishedSplits = new ArrayList<>();
for (SourceSplitBase split : splits) {
if (split.isSnapshotSplit()) {
SnapshotSplit snapshotSplit = split.asSnapshotSplit();
if (snapshotSplit.isSnapshotReadFinished()) {
finishedUnackedSplits.put(snapshotSplit.splitId(), snapshotSplit);
} else {
unfinishedSplits.add(split);
}
} else {
// the stream split is uncompleted
if (!split.asStreamSplit().isCompletedSplit()) {
uncompletedStreamSplits.put(split.splitId(), split.asStreamSplit());
requestStreamSplitMetaIfNeeded(split.asStreamSplit());
} else {
uncompletedStreamSplits.remove(split.splitId());
StreamSplit streamSplit =
discoverTableSchemasForStreamSplit(split.asStreamSplit());
unfinishedSplits.add(streamSplit);
}
}
}
// notify split enumerator again about the finished unacked snapshot splits
reportFinishedSnapshotSplitsIfNeed();
// add all un-finished splits (including stream split) to SourceReaderBase
super.addSplits(unfinishedSplits);
}
private StreamSplit discoverTableSchemasForStreamSplit(StreamSplit split) {
final String splitId = split.splitId();
if (split.getTableSchemas().isEmpty()) {
try {
Map tableSchemas =
dialect.discoverDataCollectionSchemas(sourceConfig);
LOG.info("The table schema discovery for stream split {} success", splitId);
return StreamSplit.fillTableSchemas(split, tableSchemas);
} catch (Exception e) {
LOG.error("Failed to obtains table schemas due to {}", e.getMessage());
throw new FlinkRuntimeException(e);
}
} else {
LOG.warn(
"The stream split {} has table schemas yet, skip the table schema discovery",
split);
return split;
}
}
@Override
public void handleSourceEvents(SourceEvent sourceEvent) {
if (sourceEvent instanceof FinishedSnapshotSplitsAckEvent) {
FinishedSnapshotSplitsAckEvent ackEvent = (FinishedSnapshotSplitsAckEvent) sourceEvent;
LOG.debug(
"The subtask {} receives ack event for {} from enumerator.",
subtaskId,
ackEvent.getFinishedSplits());
for (String splitId : ackEvent.getFinishedSplits()) {
this.finishedUnackedSplits.remove(splitId);
}
} else if (sourceEvent instanceof FinishedSnapshotSplitsRequestEvent) {
// report finished snapshot splits
LOG.debug(
"The subtask {} receives request to report finished snapshot splits.",
subtaskId);
reportFinishedSnapshotSplitsIfNeed();
} else if (sourceEvent instanceof StreamSplitMetaEvent) {
LOG.debug(
"The subtask {} receives stream meta with group id {}.",
subtaskId,
((StreamSplitMetaEvent) sourceEvent).getMetaGroupId());
fillMetaDataForStreamSplit((StreamSplitMetaEvent) sourceEvent);
} else {
super.handleSourceEvents(sourceEvent);
}
}
private void fillMetaDataForStreamSplit(StreamSplitMetaEvent metadataEvent) {
StreamSplit streamSplit = uncompletedStreamSplits.get(metadataEvent.getSplitId());
if (streamSplit != null) {
final int receivedMetaGroupId = metadataEvent.getMetaGroupId();
final int expectedMetaGroupId =
getNextMetaGroupId(
streamSplit.getFinishedSnapshotSplitInfos().size(),
sourceConfig.getSplitMetaGroupSize());
if (receivedMetaGroupId == expectedMetaGroupId) {
List metaDataGroup =
metadataEvent.getMetaGroup().stream()
.map(sourceSplitSerializer::deserialize)
.collect(Collectors.toList());
uncompletedStreamSplits.put(
streamSplit.splitId(),
StreamSplit.appendFinishedSplitInfos(streamSplit, metaDataGroup));
LOG.info("Fill metadata of group {} to stream split", metaDataGroup.size());
} else {
LOG.warn(
"Received out of oder metadata event for split {}, the received meta group id is {}, but expected is {}, ignore it",
metadataEvent.getSplitId(),
receivedMetaGroupId,
expectedMetaGroupId);
}
requestStreamSplitMetaIfNeeded(streamSplit);
} else {
LOG.warn(
"Received metadata event for split {}, but the uncompleted split map does not contain it",
metadataEvent.getSplitId());
}
}
private void requestStreamSplitMetaIfNeeded(StreamSplit streamSplit) {
final String splitId = streamSplit.splitId();
if (!streamSplit.isCompletedSplit()) {
final int nextMetaGroupId =
getNextMetaGroupId(
streamSplit.getFinishedSnapshotSplitInfos().size(),
sourceConfig.getSplitMetaGroupSize());
StreamSplitMetaRequestEvent splitMetaRequestEvent =
new StreamSplitMetaRequestEvent(splitId, nextMetaGroupId);
context.sendSourceEventToCoordinator(splitMetaRequestEvent);
} else {
LOG.info("The meta of stream split {} has been collected success", splitId);
this.addSplits(Collections.singletonList(streamSplit));
}
}
private void reportFinishedSnapshotSplitsIfNeed() {
if (!finishedUnackedSplits.isEmpty()) {
final Map finishedOffsets = new HashMap<>();
for (SnapshotSplit split : finishedUnackedSplits.values()) {
finishedOffsets.put(split.splitId(), split.getHighWatermark());
}
FinishedSnapshotSplitsReportEvent reportEvent =
new FinishedSnapshotSplitsReportEvent(finishedOffsets);
context.sendSourceEventToCoordinator(reportEvent);
LOG.debug(
"The subtask {} reports offsets of finished snapshot splits {}.",
subtaskId,
finishedOffsets);
}
}
/** Returns next meta group id according to received meta number and meta group size. */
public static int getNextMetaGroupId(int receivedMetaNum, int metaGroupSize) {
Preconditions.checkState(metaGroupSize > 0);
return receivedMetaNum % metaGroupSize == 0
? (receivedMetaNum / metaGroupSize)
: (receivedMetaNum / metaGroupSize) + 1;
}
@Override
protected SourceSplitBase toSplitType(String splitId, SourceSplitState splitState) {
return splitState.toSourceSplit();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy