com.snowflake.kafka.connector.internal.streaming.TopicPartitionChannel Maven / Gradle / Ivy
package com.snowflake.kafka.connector.internal.streaming;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ENABLE_CHANNEL_OFFSET_TOKEN_MIGRATION_CONFIG;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ENABLE_CHANNEL_OFFSET_TOKEN_MIGRATION_DEFAULT;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ERRORS_TOLERANCE_CONFIG;
import static com.snowflake.kafka.connector.internal.streaming.StreamingUtils.DURATION_BETWEEN_GET_OFFSET_TOKEN_RETRY;
import static com.snowflake.kafka.connector.internal.streaming.StreamingUtils.MAX_GET_OFFSET_TOKEN_RETRIES;
import static java.time.temporal.ChronoUnit.SECONDS;
import static org.apache.kafka.common.record.TimestampType.NO_TIMESTAMP_TYPE;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.BufferThreshold;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.PartitionBuffer;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelCreation;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import com.snowflake.kafka.connector.records.RecordService;
import com.snowflake.kafka.connector.records.SnowflakeJsonSchema;
import com.snowflake.kafka.connector.records.SnowflakeRecordContent;
import dev.failsafe.Failsafe;
import dev.failsafe.Fallback;
import dev.failsafe.RetryPolicy;
import dev.failsafe.function.CheckedSupplier;
import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.core.JsonProcessingException;
import net.snowflake.ingest.streaming.InsertValidationResponse;
import net.snowflake.ingest.streaming.OpenChannelRequest;
import net.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
import net.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import net.snowflake.ingest.utils.Pair;
import net.snowflake.ingest.utils.SFException;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTaskContext;
/**
* This is a wrapper on top of Streaming Ingest Channel which is responsible for ingesting rows to
* Snowflake.
*
* There is a one to one relation between partition and channel.
*
*
The number of TopicPartitionChannel objects can scale in proportion to the number of
* partitions of a topic.
*
*
Whenever a new instance is created, the cache(Map) in SnowflakeSinkService is also replaced,
* and we will reload the offsets from SF and reset the consumer offset in kafka
*
*
During rebalance, we would lose this state and hence there is a need to invoke
* getLatestOffsetToken from Snowflake
*/
public class TopicPartitionChannel {
private static final KCLogger LOGGER = new KCLogger(TopicPartitionChannel.class.getName());
public static final long NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE = -1L;
// last time we invoked insertRows API
private long previousFlushTimeStampMs;
/* Buffer to hold JSON converted incoming SinkRecords */
private StreamingBuffer streamingBuffer;
private final Lock bufferLock = new ReentrantLock(true);
// used to communicate to the streaming ingest's insertRows API
// This is non final because we might decide to get the new instance of Channel
private SnowflakeStreamingIngestChannel channel;
// -------- private final fields -------- //
// This offset represents the data persisted in Snowflake. More specifically it is the Snowflake
// offset determined from the insertRows API call. It is set after calling the fetchOffsetToken
// API for this channel
private final AtomicLong offsetPersistedInSnowflake =
new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);
// This offset represents the data buffered in KC. More specifically it is the KC offset to ensure
// exactly once functionality. On the creation it is set to the latest committed token in
// Snowflake (see offsetPersistedInSnowflake) and updated on each new row from KC.
private final AtomicLong processedOffset =
new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);
// This offset is a fallback to represent the data buffered in KC. It is similar to
// processedOffset, however it is only used to resend the offset when the channel offset token is
// NULL. It is updated to the first offset sent by KC (see processedOffset) or the offset
// persisted in Snowflake (see offsetPersistedInSnowflake)
private final AtomicLong latestConsumerOffset =
new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);
// Indicates whether we need to skip and discard any leftover rows in the current batch, this
// could happen when the channel gets invalidated and reset, then anything left in the buffer
// should be skipped
private boolean needToSkipCurrentBatch = false;
private final SnowflakeStreamingIngestClient streamingIngestClient;
// Topic partition Object from connect consisting of topic and partition
private final TopicPartition topicPartition;
/* Channel Name is computed from topic and partition */
private final String channelNameFormatV1;
/* table is required for opening the channel */
private final String tableName;
/* Error handling, DB, schema, Snowflake URL and other snowflake specific connector properties are defined here. */
private final Map sfConnectorConfig;
/* Responsible for converting records to Json */
private final RecordService recordService;
/* Responsible for returning errors to DLQ if records have failed to be ingested. */
private final KafkaRecordErrorReporter kafkaRecordErrorReporter;
/**
* Available from {@link org.apache.kafka.connect.sink.SinkTask} which has access to various
* utility methods.
*/
private final SinkTaskContext sinkTaskContext;
/* Error related properties */
// If set to true, we will send records to DLQ provided DLQ name is valid.
private final boolean errorTolerance;
// Whether to log errors to log file.
private final boolean logErrors;
// Set to false if DLQ topic is null or empty. True if it is a valid string in config
private final boolean isDLQTopicSet;
// Used to identify when to flush (Time, bytes or number of records)
private final BufferThreshold streamingBufferThreshold;
// Whether schematization has been enabled.
private final boolean enableSchematization;
// Whether schema evolution could be done on this channel
private final boolean enableSchemaEvolution;
// Reference to the Snowflake connection service
private final SnowflakeConnectionService conn;
private final SnowflakeTelemetryChannelStatus snowflakeTelemetryChannelStatus;
/**
* Used to send telemetry to Snowflake. Currently, TelemetryClient created from a Snowflake
* Connection Object, i.e. not a session-less Client
*/
private final SnowflakeTelemetryService telemetryServiceV2;
/** Testing only, initialize TopicPartitionChannel without the connection service */
@VisibleForTesting
public TopicPartitionChannel(
SnowflakeStreamingIngestClient streamingIngestClient,
TopicPartition topicPartition,
final String channelNameFormatV1,
final String tableName,
final BufferThreshold streamingBufferThreshold,
final Map sfConnectorConfig,
KafkaRecordErrorReporter kafkaRecordErrorReporter,
SinkTaskContext sinkTaskContext,
SnowflakeConnectionService conn,
SnowflakeTelemetryService telemetryService) {
this(
streamingIngestClient,
topicPartition,
channelNameFormatV1,
tableName,
false, /* No schema evolution permission */
streamingBufferThreshold,
sfConnectorConfig,
kafkaRecordErrorReporter,
sinkTaskContext,
conn,
new RecordService(telemetryService),
telemetryService,
false,
null);
}
/**
* @param streamingIngestClient client created specifically for this task
* @param topicPartition topic partition corresponding to this Streaming Channel
* (TopicPartitionChannel)
* @param channelNameFormatV1 channel Name which is deterministic for topic and partition
* @param tableName table to ingest in snowflake
* @param hasSchemaEvolutionPermission if the role has permission to perform schema evolution on
* the table
* @param streamingBufferThreshold bytes, count of records and flush time thresholds.
* @param sfConnectorConfig configuration set for snowflake connector
* @param kafkaRecordErrorReporter kafka errpr reporter for sending records to DLQ
* @param sinkTaskContext context on Kafka Connect's runtime
* @param conn the snowflake connection service
* @param recordService record service for processing incoming offsets from Kafka
* @param telemetryService Telemetry Service which includes the Telemetry Client, sends Json data
* to Snowflake
*/
public TopicPartitionChannel(
SnowflakeStreamingIngestClient streamingIngestClient,
TopicPartition topicPartition,
final String channelNameFormatV1,
final String tableName,
boolean hasSchemaEvolutionPermission,
final BufferThreshold streamingBufferThreshold,
final Map sfConnectorConfig,
KafkaRecordErrorReporter kafkaRecordErrorReporter,
SinkTaskContext sinkTaskContext,
SnowflakeConnectionService conn,
RecordService recordService,
SnowflakeTelemetryService telemetryService,
boolean enableCustomJMXMonitoring,
MetricsJmxReporter metricsJmxReporter) {
final long startTime = System.currentTimeMillis();
this.streamingIngestClient = Preconditions.checkNotNull(streamingIngestClient);
Preconditions.checkState(!streamingIngestClient.isClosed());
this.topicPartition = Preconditions.checkNotNull(topicPartition);
this.channelNameFormatV1 = Preconditions.checkNotNull(channelNameFormatV1);
this.tableName = Preconditions.checkNotNull(tableName);
this.streamingBufferThreshold = Preconditions.checkNotNull(streamingBufferThreshold);
this.sfConnectorConfig = Preconditions.checkNotNull(sfConnectorConfig);
this.kafkaRecordErrorReporter = Preconditions.checkNotNull(kafkaRecordErrorReporter);
this.sinkTaskContext = Preconditions.checkNotNull(sinkTaskContext);
this.conn = conn;
this.recordService = recordService;
this.telemetryServiceV2 = Preconditions.checkNotNull(telemetryService);
this.previousFlushTimeStampMs = System.currentTimeMillis();
this.streamingBuffer = new StreamingBuffer();
/* Error properties */
this.errorTolerance = StreamingUtils.tolerateErrors(this.sfConnectorConfig);
this.logErrors = StreamingUtils.logErrors(this.sfConnectorConfig);
this.isDLQTopicSet =
!Strings.isNullOrEmpty(StreamingUtils.getDlqTopicName(this.sfConnectorConfig));
/* Schematization related properties */
this.enableSchematization =
this.recordService.setAndGetEnableSchematizationFromConfig(sfConnectorConfig);
this.enableSchemaEvolution = this.enableSchematization && hasSchemaEvolutionPermission;
if (isEnableChannelOffsetMigration(sfConnectorConfig)) {
/* Channel Name format V2 is computed from connector name, topic and partition */
final String channelNameFormatV2 =
generateChannelNameFormatV2(this.channelNameFormatV1, this.conn.getConnectorName());
conn.migrateStreamingChannelOffsetToken(
this.tableName, channelNameFormatV2, this.channelNameFormatV1);
}
// Open channel and reset the offset in kafka
this.channel = Preconditions.checkNotNull(openChannelForTable());
final long lastCommittedOffsetToken = fetchOffsetTokenWithRetry();
this.offsetPersistedInSnowflake.set(lastCommittedOffsetToken);
this.processedOffset.set(lastCommittedOffsetToken);
// setup telemetry and metrics
String connectorName =
conn == null || conn.getConnectorName() == null || conn.getConnectorName().isEmpty()
? "default_connector_name"
: conn.getConnectorName();
this.snowflakeTelemetryChannelStatus =
new SnowflakeTelemetryChannelStatus(
tableName,
connectorName,
channelNameFormatV1,
startTime,
enableCustomJMXMonitoring,
metricsJmxReporter,
this.offsetPersistedInSnowflake,
this.processedOffset,
this.latestConsumerOffset);
this.telemetryServiceV2.reportKafkaPartitionStart(
new SnowflakeTelemetryChannelCreation(this.tableName, this.channelNameFormatV1, startTime));
if (lastCommittedOffsetToken != NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
this.sinkTaskContext.offset(this.topicPartition, lastCommittedOffsetToken + 1L);
} else {
LOGGER.info(
"TopicPartitionChannel:{}, offset token is NULL, will rely on Kafka to send us the"
+ " correct offset instead",
this.getChannelNameFormatV1());
}
}
/**
* Checks if the configuration provided in Snowflake Kafka Connect has set {@link
* SnowflakeSinkConnectorConfig#ENABLE_CHANNEL_OFFSET_TOKEN_MIGRATION_CONFIG} to any value. If not
* set, it fetches the default value.
*
* If the returned is false, system function for channel offset migration will not be called
* and Channel name will use V1 format.
*
* @param sfConnectorConfig customer provided json config
* @return true is enabled, false otherwise
*/
private boolean isEnableChannelOffsetMigration(Map sfConnectorConfig) {
boolean isEnableChannelOffsetMigration =
Boolean.parseBoolean(
sfConnectorConfig.getOrDefault(
SnowflakeSinkConnectorConfig.ENABLE_CHANNEL_OFFSET_TOKEN_MIGRATION_CONFIG,
Boolean.toString(ENABLE_CHANNEL_OFFSET_TOKEN_MIGRATION_DEFAULT)));
if (!isEnableChannelOffsetMigration) {
LOGGER.info(
"Config:{} is disabled for connector:{}",
ENABLE_CHANNEL_OFFSET_TOKEN_MIGRATION_CONFIG,
conn.getConnectorName());
}
return isEnableChannelOffsetMigration;
}
/**
* This is the new channel Name format that was created. New channel name prefixes connector name
* in old format. Please note, we will not open channel with new format. We will run a migration
* function from this new channel format to old channel format and drop new channel format.
*
* @param channelNameFormatV1 Original format used.
* @param connectorName connector name used in SF config JSON.
* @return new channel name introduced as part of @see
* this change (released in version 2.1.0)
*/
@VisibleForTesting
public static String generateChannelNameFormatV2(
String channelNameFormatV1, String connectorName) {
return connectorName + "_" + channelNameFormatV1;
}
/**
* Inserts the record into buffer
*
* Step 1: Initializes this channel by fetching the offsetToken from Snowflake for the first
* time this channel/partition has received offset after start/restart.
*
*
Step 2: Decides whether given offset from Kafka needs to be processed and whether it
* qualifies for being added into buffer.
*
* @param kafkaSinkRecord input record from Kafka
* @param isFirstRowPerPartitionInBatch indicates whether the given record is the first record per
* partition in a batch
*/
public void insertRecordToBuffer(
SinkRecord kafkaSinkRecord, boolean isFirstRowPerPartitionInBatch) {
final long currentOffsetPersistedInSnowflake = this.offsetPersistedInSnowflake.get();
final long currentProcessedOffset = this.processedOffset.get();
// Set the consumer offset to be the first record that Kafka sends us
if (latestConsumerOffset.get() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
this.latestConsumerOffset.set(kafkaSinkRecord.kafkaOffset());
}
// Reset the value if it's a new batch
if (isFirstRowPerPartitionInBatch) {
needToSkipCurrentBatch = false;
}
// Simply skip inserting into the buffer if the row should be ignored after channel reset
if (needToSkipCurrentBatch) {
LOGGER.info(
"Ignore adding offset:{} to buffer for channel:{} because we recently reset offset in"
+ " Kafka. currentProcessedOffset:{}",
kafkaSinkRecord.kafkaOffset(),
this.getChannelNameFormatV1(),
currentProcessedOffset);
return;
}
// Accept the incoming record only if we don't have a valid offset token at server side, or the
// incoming record offset is 1 + the processed offset
if (currentProcessedOffset == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE
|| kafkaSinkRecord.kafkaOffset() >= currentProcessedOffset + 1) {
StreamingBuffer copiedStreamingBuffer = null;
bufferLock.lock();
try {
this.streamingBuffer.insert(kafkaSinkRecord);
this.processedOffset.set(kafkaSinkRecord.kafkaOffset());
// # of records or size based flushing
if (this.streamingBufferThreshold.shouldFlushOnBufferByteSize(
streamingBuffer.getBufferSizeBytes())
|| this.streamingBufferThreshold.shouldFlushOnBufferRecordCount(
streamingBuffer.getNumOfRecords())) {
copiedStreamingBuffer = streamingBuffer;
this.streamingBuffer = new StreamingBuffer();
LOGGER.debug(
"Flush based on buffered bytes or buffered number of records for"
+ " channel:{},currentBufferSizeInBytes:{}, currentBufferedRecordCount:{},"
+ " connectorBufferThresholds:{}",
this.getChannelNameFormatV1(),
copiedStreamingBuffer.getBufferSizeBytes(),
copiedStreamingBuffer.getSinkRecords().size(),
this.streamingBufferThreshold);
}
} finally {
bufferLock.unlock();
}
// If we found reaching buffer size threshold or count based threshold, we will immediately
// flush (Insert them)
if (copiedStreamingBuffer != null) {
insertBufferedRecords(copiedStreamingBuffer);
}
} else {
LOGGER.debug(
"Skip adding offset:{} to buffer for channel:{} because"
+ " offsetPersistedInSnowflake:{}, processedOffset:{}",
kafkaSinkRecord.kafkaOffset(),
this.getChannelNameFormatV1(),
currentOffsetPersistedInSnowflake,
currentProcessedOffset);
}
}
private boolean shouldConvertContent(final Object content) {
return content != null && !(content instanceof SnowflakeRecordContent);
}
/**
* This would always return false for streaming ingest use case since isBroken field is never set.
* isBroken is set only when using Custom snowflake converters and the content was not json
* serializable.
*
*
For Community converters, the kafka record will not be sent to Kafka connector if the record
* is not serializable.
*/
private boolean isRecordBroken(final SinkRecord record) {
return isContentBroken(record.value()) || isContentBroken(record.key());
}
private boolean isContentBroken(final Object content) {
return content != null && ((SnowflakeRecordContent) content).isBroken();
}
private SinkRecord handleNativeRecord(SinkRecord record, boolean isKey) {
SnowflakeRecordContent newSFContent;
Schema schema = isKey ? record.keySchema() : record.valueSchema();
Object content = isKey ? record.key() : record.value();
try {
newSFContent = new SnowflakeRecordContent(schema, content, true);
} catch (Exception e) {
LOGGER.error("Native content parser error:\n{}", e.getMessage());
try {
// try to serialize this object and send that as broken record
ByteArrayOutputStream out = new ByteArrayOutputStream();
ObjectOutputStream os = new ObjectOutputStream(out);
os.writeObject(content);
newSFContent = new SnowflakeRecordContent(out.toByteArray());
} catch (Exception serializeError) {
LOGGER.error(
"Failed to convert broken native record to byte data:\n{}",
serializeError.getMessage());
throw e;
}
}
// create new sinkRecord
Schema keySchema = isKey ? new SnowflakeJsonSchema() : record.keySchema();
Object keyContent = isKey ? newSFContent : record.key();
Schema valueSchema = isKey ? record.valueSchema() : new SnowflakeJsonSchema();
Object valueContent = isKey ? record.value() : newSFContent;
return new SinkRecord(
record.topic(),
record.kafkaPartition(),
keySchema,
keyContent,
valueSchema,
valueContent,
record.kafkaOffset(),
record.timestamp(),
record.timestampType(),
record.headers());
}
// --------------- BUFFER FLUSHING LOGIC --------------- //
/**
* If difference between current time and previous flush time is more than threshold, insert the
* buffered Rows.
*
*
Note: We acquire buffer lock since we copy the buffer.
*
*
Threshold is config parameter: {@link
* com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig#BUFFER_FLUSH_TIME_SEC}
*
*
Previous flush time here means last time we called insertRows API with rows present in
*/
protected void insertBufferedRecordsIfFlushTimeThresholdReached() {
if (this.streamingBufferThreshold.shouldFlushOnBufferTime(this.previousFlushTimeStampMs)) {
LOGGER.debug(
"Time based flush for channel:{}, CurrentTimeMs:{}, previousFlushTimeMs:{},"
+ " bufferThresholdSeconds:{}",
this.getChannelNameFormatV1(),
System.currentTimeMillis(),
this.previousFlushTimeStampMs,
this.streamingBufferThreshold.getFlushTimeThresholdSeconds());
StreamingBuffer copiedStreamingBuffer;
bufferLock.lock();
try {
copiedStreamingBuffer = this.streamingBuffer;
this.streamingBuffer = new StreamingBuffer();
} finally {
bufferLock.unlock();
}
if (copiedStreamingBuffer != null) {
insertBufferedRecords(copiedStreamingBuffer);
}
}
}
/**
* Invokes insertRows API using the provided offsets which were initially buffered for this
* partition. This buffer is decided based on the flush time threshold, buffered bytes or number
* of records
*/
InsertRowsResponse insertBufferedRecords(StreamingBuffer streamingBufferToInsert) {
// intermediate buffer can be empty here if time interval reached but kafka produced no records.
if (streamingBufferToInsert.isEmpty()) {
LOGGER.debug("No Rows Buffered for channel:{}, returning", this.getChannelNameFormatV1());
this.previousFlushTimeStampMs = System.currentTimeMillis();
return null;
}
InsertRowsResponse response = null;
try {
response = insertRowsWithFallback(streamingBufferToInsert);
// Updates the flush time (last time we called insertRows API)
this.previousFlushTimeStampMs = System.currentTimeMillis();
LOGGER.info(
"Successfully called insertRows for channel:{}, buffer:{}, insertResponseHasErrors:{},"
+ " needToResetOffset:{}",
this.getChannelNameFormatV1(),
streamingBufferToInsert,
response.hasErrors(),
response.needToResetOffset());
if (response.hasErrors()) {
handleInsertRowsFailures(
response.getInsertErrors(), streamingBufferToInsert.getSinkRecords());
}
// Due to schema evolution, we may need to reopen the channel and reset the offset in kafka
// since it's possible that not all rows are ingested
if (response.needToResetOffset()) {
streamingApiFallbackSupplier(
StreamingApiFallbackInvoker.INSERT_ROWS_SCHEMA_EVOLUTION_FALLBACK);
}
return response;
} catch (TopicPartitionChannelInsertionException ex) {
// Suppressing the exception because other channels might still continue to ingest
LOGGER.warn(
String.format(
"[INSERT_BUFFERED_RECORDS] Failure inserting buffer:%s for channel:%s",
streamingBufferToInsert, this.getChannelNameFormatV1()),
ex);
}
return response;
}
/**
* Uses {@link Fallback} API to reopen the channel if insertRows throws {@link SFException}.
*
*
We have deliberately not performed retries on insertRows because it might slow down overall
* ingestion and introduce lags in committing offsets to Kafka.
*
*
Note that insertRows API does perform channel validation which might throw SFException if
* channel is invalidated.
*
*
It can also send errors {@link
* net.snowflake.ingest.streaming.InsertValidationResponse.InsertError} in form of response inside
* {@link InsertValidationResponse}
*
* @param buffer buffer to insert into snowflake
* @return InsertRowsResponse a response that wraps around InsertValidationResponse
*/
private InsertRowsResponse insertRowsWithFallback(StreamingBuffer buffer) {
Fallback