
com.ververica.cdc.connectors.base.source.assigner.HybridSplitAssigner Maven / Gradle / Ivy
/*
* Copyright 2023 Ververica Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ververica.cdc.connectors.base.source.assigner;
import com.ververica.cdc.connectors.base.config.SourceConfig;
import com.ververica.cdc.connectors.base.dialect.DataSourceDialect;
import com.ververica.cdc.connectors.base.source.assigner.state.HybridPendingSplitsState;
import com.ververica.cdc.connectors.base.source.assigner.state.PendingSplitsState;
import com.ververica.cdc.connectors.base.source.meta.offset.Offset;
import com.ververica.cdc.connectors.base.source.meta.offset.OffsetFactory;
import com.ververica.cdc.connectors.base.source.meta.split.FinishedSnapshotSplitInfo;
import com.ververica.cdc.connectors.base.source.meta.split.SchemalessSnapshotSplit;
import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitBase;
import com.ververica.cdc.connectors.base.source.meta.split.StreamSplit;
import io.debezium.relational.TableId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
/** Assigner for Hybrid split which contains snapshot splits and stream splits. */
public class HybridSplitAssigner implements SplitAssigner {
private static final Logger LOG = LoggerFactory.getLogger(HybridSplitAssigner.class);
private static final String STREAM_SPLIT_ID = "stream-split";
private final int splitMetaGroupSize;
private boolean isStreamSplitAssigned;
private final SnapshotSplitAssigner snapshotSplitAssigner;
private final OffsetFactory offsetFactory;
public HybridSplitAssigner(
C sourceConfig,
int currentParallelism,
List remainingTables,
boolean isTableIdCaseSensitive,
DataSourceDialect dialect,
OffsetFactory offsetFactory) {
this(
new SnapshotSplitAssigner<>(
sourceConfig,
currentParallelism,
remainingTables,
isTableIdCaseSensitive,
dialect,
offsetFactory),
false,
sourceConfig.getSplitMetaGroupSize(),
offsetFactory);
}
public HybridSplitAssigner(
C sourceConfig,
int currentParallelism,
HybridPendingSplitsState checkpoint,
DataSourceDialect dialect,
OffsetFactory offsetFactory) {
this(
new SnapshotSplitAssigner<>(
sourceConfig,
currentParallelism,
checkpoint.getSnapshotPendingSplits(),
dialect,
offsetFactory),
checkpoint.isStreamSplitAssigned(),
sourceConfig.getSplitMetaGroupSize(),
offsetFactory);
}
private HybridSplitAssigner(
SnapshotSplitAssigner snapshotSplitAssigner,
boolean isStreamSplitAssigned,
int splitMetaGroupSize,
OffsetFactory offsetFactory) {
this.snapshotSplitAssigner = snapshotSplitAssigner;
this.isStreamSplitAssigned = isStreamSplitAssigned;
this.splitMetaGroupSize = splitMetaGroupSize;
this.offsetFactory = offsetFactory;
}
@Override
public void open() {
snapshotSplitAssigner.open();
}
@Override
public Optional getNext() {
if (snapshotSplitAssigner.noMoreSplits()) {
// stream split assigning
if (isStreamSplitAssigned) {
// no more splits for the assigner
LOG.trace(
"No more splits for the SnapshotSplitAssigner. StreamSplit is already assigned.");
return Optional.empty();
} else if (snapshotSplitAssigner.isFinished()) {
// we need to wait snapshot-assigner to be finished before
// assigning the stream split. Otherwise, records emitted from stream split
// might be out-of-order in terms of same primary key with snapshot splits.
isStreamSplitAssigned = true;
StreamSplit streamSplit = createStreamSplit();
LOG.trace(
"SnapshotSplitAssigner is finished: creating a new stream split {}",
streamSplit);
return Optional.of(streamSplit);
} else {
// stream split is not ready by now
LOG.trace(
"Waiting for SnapshotSplitAssigner to be finished before assigning a new stream split.");
return Optional.empty();
}
} else {
// snapshot assigner still have remaining splits, assign split from it
return snapshotSplitAssigner.getNext();
}
}
@Override
public boolean waitingForFinishedSplits() {
return snapshotSplitAssigner.waitingForFinishedSplits();
}
@Override
public List getFinishedSplitInfos() {
return snapshotSplitAssigner.getFinishedSplitInfos();
}
@Override
public void onFinishedSplits(Map splitFinishedOffsets) {
snapshotSplitAssigner.onFinishedSplits(splitFinishedOffsets);
}
@Override
public void addSplits(Collection splits) {
List snapshotSplits = new ArrayList<>();
for (SourceSplitBase split : splits) {
if (split.isSnapshotSplit()) {
snapshotSplits.add(split);
} else {
// we don't store the split, but will re-create stream split later
isStreamSplitAssigned = false;
}
}
snapshotSplitAssigner.addSplits(snapshotSplits);
}
@Override
public PendingSplitsState snapshotState(long checkpointId) {
return new HybridPendingSplitsState(
snapshotSplitAssigner.snapshotState(checkpointId), isStreamSplitAssigned);
}
@Override
public void notifyCheckpointComplete(long checkpointId) {
snapshotSplitAssigner.notifyCheckpointComplete(checkpointId);
}
@Override
public boolean isStreamSplitAssigned() {
return isStreamSplitAssigned;
}
@Override
public void close() {
snapshotSplitAssigner.close();
}
// --------------------------------------------------------------------------------------------
public StreamSplit createStreamSplit() {
final List assignedSnapshotSplit =
snapshotSplitAssigner.getAssignedSplits().values().stream()
.sorted(Comparator.comparing(SourceSplitBase::splitId))
.collect(Collectors.toList());
Map splitFinishedOffsets = snapshotSplitAssigner.getSplitFinishedOffsets();
final List finishedSnapshotSplitInfos = new ArrayList<>();
Offset minOffset = null;
for (SchemalessSnapshotSplit split : assignedSnapshotSplit) {
// find the min offset of change log
Offset changeLogOffset = splitFinishedOffsets.get(split.splitId());
if (minOffset == null || changeLogOffset.isBefore(minOffset)) {
minOffset = changeLogOffset;
}
finishedSnapshotSplitInfos.add(
new FinishedSnapshotSplitInfo(
split.getTableId(),
split.splitId(),
split.getSplitStart(),
split.getSplitEnd(),
changeLogOffset,
offsetFactory));
}
// the finishedSnapshotSplitInfos is too large for transmission, divide it to groups and
// then transfer them
boolean divideMetaToGroups = finishedSnapshotSplitInfos.size() > splitMetaGroupSize;
return new StreamSplit(
STREAM_SPLIT_ID,
minOffset == null ? offsetFactory.createInitialOffset() : minOffset,
offsetFactory.createNoStoppingOffset(),
divideMetaToGroups ? new ArrayList<>() : finishedSnapshotSplitInfos,
new HashMap<>(),
finishedSnapshotSplitInfos.size());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy