
com.snowflake.kafka.connector.internal.SnowflakeSinkServiceV1 Maven / Gradle / Ivy
package com.snowflake.kafka.connector.internal;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.BUFFER_RECORD_COUNT;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.BUFFER_SIZE_BYTES;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.BUFFER_SUB_DOMAIN;
import static org.apache.kafka.common.record.TimestampType.NO_TIMESTAMP_TYPE;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.metrics.MetricsUtil;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryPipeCreation;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryPipeStatus;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import com.snowflake.kafka.connector.records.RecordService;
import com.snowflake.kafka.connector.records.SnowflakeJsonSchema;
import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig;
import com.snowflake.kafka.connector.records.SnowflakeRecordContent;
import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.sink.SinkRecord;
/**
* This is per task configuration. A task can be assigned multiple partitions. Major methods are
* startTask, insert, getOffset and close methods.
*
* StartTask: Called when partitions are assigned. Responsible for generating the POJOs.
*
*
Insert and getOffset are called when {@link
* com.snowflake.kafka.connector.SnowflakeSinkTask#put(Collection)} and {@link
* com.snowflake.kafka.connector.SnowflakeSinkTask#preCommit(Map)} APIs are called.
*/
class SnowflakeSinkServiceV1 implements SnowflakeSinkService {
private final KCLogger LOGGER = new KCLogger(SnowflakeSinkServiceV1.class.getName());
private static final long ONE_HOUR = 60 * 60 * 1000L;
private static final long TEN_MINUTES = 10 * 60 * 1000L;
protected static final long CLEAN_TIME = 60 * 1000L; // one minutes
// Set in config (Time based flush) in seconds
private long flushTime;
// Set in config (buffer size based flush) in bytes
private long fileSize;
// Set in config (Threshold before we send the buffer to internal stage) corresponds to # of
// records in kafka
private long recordNum;
private final SnowflakeConnectionService conn;
private final Map pipes;
private final RecordService recordService;
private boolean isStopped;
private final SnowflakeTelemetryService telemetryService;
private Map topic2TableMap;
// Behavior to be set at the start of connector start. (For tombstone records)
private SnowflakeSinkConnectorConfig.BehaviorOnNullValues behaviorOnNullValues;
// default is true unless the configuration provided is false;
// If this is true, we will enable Mbean for required classes and emit JMX metrics for monitoring
private boolean enableCustomJMXMonitoring = SnowflakeSinkConnectorConfig.JMX_OPT_DEFAULT;
SnowflakeSinkServiceV1(SnowflakeConnectionService conn) {
if (conn == null || conn.isClosed()) {
throw SnowflakeErrors.ERROR_5010.getException();
}
this.fileSize = SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_DEFAULT;
this.recordNum = SnowflakeSinkConnectorConfig.BUFFER_COUNT_RECORDS_DEFAULT;
this.flushTime = SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_DEFAULT;
this.pipes = new HashMap<>();
this.conn = conn;
isStopped = false;
this.telemetryService = conn.getTelemetryClient();
this.recordService = new RecordService(this.telemetryService);
this.topic2TableMap = new HashMap<>();
// Setting the default value in constructor
// meaning it will not ignore the null values (Tombstone records wont be ignored/filtered)
this.behaviorOnNullValues = SnowflakeSinkConnectorConfig.BehaviorOnNullValues.DEFAULT;
}
/**
* Create new ingestion task from existing table and stage, tries to reuse existing pipe and
* recover previous task, otherwise, create a new pipe.
*
* @param tableName destination table name in Snowflake
* @param topicPartition TopicPartition passed from Kafka
*/
@Override
public void startPartition(final String tableName, final TopicPartition topicPartition) {
String stageName = Utils.stageName(conn.getConnectorName(), tableName);
String nameIndex = getNameIndex(topicPartition.topic(), topicPartition.partition());
if (pipes.containsKey(nameIndex)) {
LOGGER.warn("task is already registered with {} partition", nameIndex);
} else {
String pipeName =
Utils.pipeName(conn.getConnectorName(), tableName, topicPartition.partition());
pipes.put(
nameIndex,
new ServiceContext(tableName, stageName, pipeName, conn, topicPartition.partition()));
}
}
@Override
public void startPartitions(
Collection partitions, Map topic2Table) {
partitions.forEach(tp -> this.startPartition(Utils.tableName(tp.topic(), topic2Table), tp));
}
@Override
public void insert(final Collection records) {
// note that records can be empty
for (SinkRecord record : records) {
// check if need to handle null value records
if (recordService.shouldSkipNullValue(record, behaviorOnNullValues)) {
continue;
}
// Might happen a count of record based flushing
insert(record);
}
// check all sink context to see if they need to be flushed
for (ServiceContext pipe : pipes.values()) {
// Time based flushing
if (pipe.shouldFlush()) {
pipe.flushBuffer();
}
}
}
@Override
public void insert(SinkRecord record) {
String nameIndex = getNameIndex(record.topic(), record.kafkaPartition());
// init a new topic partition
if (!pipes.containsKey(nameIndex)) {
LOGGER.warn(
"Topic: {} Partition: {} hasn't been initialized by OPEN " + "function",
record.topic(),
record.kafkaPartition());
startPartition(
Utils.tableName(record.topic(), this.topic2TableMap),
new TopicPartition(record.topic(), record.kafkaPartition()));
}
pipes.get(nameIndex).insert(record);
}
@Override
public long getOffset(final TopicPartition topicPartition) {
String name = getNameIndex(topicPartition.topic(), topicPartition.partition());
if (pipes.containsKey(name)) {
return pipes.get(name).getOffset();
} else {
LOGGER.warn(
"Topic: {} Partition: {} hasn't been initialized to get offset",
topicPartition.topic(),
topicPartition.partition());
return 0;
}
}
@Override
public int getPartitionCount() {
return pipes.size();
}
// used for testing only
@Override
public void callAllGetOffset() {
for (ServiceContext pipe : pipes.values()) {
pipe.getOffset();
}
}
@Override
public void close(Collection partitions) {
partitions.forEach(
tp -> {
String name = getNameIndex(tp.topic(), tp.partition());
ServiceContext sc = pipes.remove(name);
if (sc != null) {
try {
sc.close();
} catch (Exception e) {
LOGGER.error(
"Failed to close sink service for Topic: {}, Partition: " + "{}\nMessage:{}",
tp.topic(),
tp.partition(),
e.getMessage());
} finally {
sc.unregisterPipeJMXMetrics();
}
} else {
LOGGER.warn(
"Failed to close sink service for Topic: {}, Partition: {}, "
+ "sink service hasn't been initialized",
tp.topic(),
tp.partition());
}
});
}
@Override
public void closeAll() {
this.isStopped = true; // release all cleaner and flusher threads
pipes.forEach(
(name, context) -> {
context.close();
context.unregisterPipeJMXMetrics();
});
pipes.clear();
}
@Override
public void setIsStoppedToTrue() {
this.isStopped = true; // release all cleaner and flusher threads
}
@Override
public boolean isClosed() {
return this.isStopped;
}
@Override
public void setRecordNumber(final long num) {
if (num < 0) {
LOGGER.error("number of record in each file is {}, it is negative, reset to" + " 0");
this.recordNum = 0;
} else {
this.recordNum = num;
LOGGER.info("set number of record limitation to {}", num);
}
}
@Override
public void setFileSize(final long size) {
if (size < SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_MIN) {
LOGGER.error(
"file size is {} bytes, it is smaller than the minimum file "
+ "size {} bytes, reset to the default file size",
size,
SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_DEFAULT);
this.fileSize = SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_DEFAULT;
} else {
this.fileSize = size;
LOGGER.info("set file size limitation to {} bytes", size);
}
}
@Override
public void setFlushTime(final long time) {
if (time < SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_MIN) {
LOGGER.error(
"flush time is {} seconds, it is smaller than the minimum "
+ "flush time {} seconds, reset to the minimum flush time",
time,
SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_MIN);
this.flushTime = SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_MIN;
} else {
this.flushTime = time;
LOGGER.info("set flush time to {} seconds", time);
}
}
@Override
public void setTopic2TableMap(Map topic2TableMap) {
this.topic2TableMap = topic2TableMap;
}
@Override
public void setMetadataConfig(SnowflakeMetadataConfig configMap) {
this.recordService.setMetadataConfig(configMap);
}
@Override
public long getRecordNumber() {
return this.recordNum;
}
@Override
public long getFlushTime() {
return this.flushTime;
}
@Override
public long getFileSize() {
return this.fileSize;
}
@Override
public void setBehaviorOnNullValuesConfig(
SnowflakeSinkConnectorConfig.BehaviorOnNullValues behavior) {
this.behaviorOnNullValues = behavior;
}
@Override
public void setCustomJMXMetrics(boolean enableJMX) {
this.enableCustomJMXMonitoring = enableJMX;
}
@Override
public SnowflakeSinkConnectorConfig.BehaviorOnNullValues getBehaviorOnNullValuesConfig() {
return this.behaviorOnNullValues;
}
/**
* Loop through all pipes in memory and find out the metric registry instance for that pipe. The
* pipes object's key is not pipeName hence need to loop over.
*
* @param pipeName associated MetricRegistry to fetch
* @return Optional MetricRegistry. (Empty if pipe was not found in pipes map)
*/
@Override
public Optional getMetricRegistry(final String pipeName) {
for (Map.Entry entry : this.pipes.entrySet()) {
if (entry.getValue().pipeName.equalsIgnoreCase(pipeName)) {
return Optional.of(entry.getValue().getMetricRegistry());
}
}
return Optional.empty();
}
@VisibleForTesting
protected static String getNameIndex(String topic, int partition) {
return topic + "_" + partition;
}
private class ServiceContext {
private final String tableName;
private final String stageName;
private final String pipeName;
private final SnowflakeConnectionService conn;
private final SnowflakeIngestionService ingestionService;
private List fileNames;
// Includes a list of files:
// 1. Which are added after a flush into internal stage is successful
// 2. While an app restarts and we do list on an internal stage to find out what needs to be
// done on leaked files.
private List cleanerFileNames;
private SnowpipeBuffer buffer;
private final String prefix;
private final AtomicLong committedOffset; // loaded offset + 1
private final AtomicLong flushedOffset; // flushed offset (file on stage)
private final AtomicLong processedOffset; // processed offset
private long previousFlushTimeStamp;
// threads
private final ExecutorService cleanerExecutor;
private final ExecutorService reprocessCleanerExecutor;
private final Lock bufferLock;
private final Lock fileListLock;
// telemetry
private final SnowflakeTelemetryPipeStatus pipeStatus;
// non null
private final MetricRegistry metricRegistry;
// Wrapper on Metric registry instance which will hold all registered metrics for this pipe
private final MetricsJmxReporter metricsJmxReporter;
// buffer metrics, updated everytime when a buffer is flushed to internal stage
private Histogram partitionBufferSizeBytesHistogram; // in Bytes
private Histogram partitionBufferCountHistogram;
// make the initialization lazy
private boolean hasInitialized = false;
private boolean forceCleanerFileReset = false;
private ServiceContext(
String tableName,
String stageName,
String pipeName,
SnowflakeConnectionService conn,
int partition) {
this.pipeName = pipeName;
this.tableName = tableName;
this.stageName = stageName;
this.conn = conn;
this.fileNames = new LinkedList<>();
this.cleanerFileNames = new LinkedList<>();
this.buffer = new SnowpipeBuffer();
this.ingestionService = conn.buildIngestService(stageName, pipeName);
this.prefix = FileNameUtils.filePrefix(conn.getConnectorName(), tableName, partition);
this.processedOffset = new AtomicLong(-1);
this.flushedOffset = new AtomicLong(-1);
this.committedOffset = new AtomicLong(0);
this.previousFlushTimeStamp = System.currentTimeMillis();
this.bufferLock = new ReentrantLock();
this.fileListLock = new ReentrantLock();
this.metricRegistry = new MetricRegistry();
this.metricsJmxReporter =
new MetricsJmxReporter(this.metricRegistry, conn.getConnectorName());
this.pipeStatus =
new SnowflakeTelemetryPipeStatus(
tableName, stageName, pipeName, enableCustomJMXMonitoring, this.metricsJmxReporter);
this.cleanerExecutor = Executors.newSingleThreadExecutor();
this.reprocessCleanerExecutor = Executors.newSingleThreadExecutor();
if (enableCustomJMXMonitoring) {
partitionBufferCountHistogram =
this.metricRegistry.histogram(
MetricsUtil.constructMetricName(pipeName, BUFFER_SUB_DOMAIN, BUFFER_RECORD_COUNT));
partitionBufferSizeBytesHistogram =
this.metricRegistry.histogram(
MetricsUtil.constructMetricName(pipeName, BUFFER_SUB_DOMAIN, BUFFER_SIZE_BYTES));
LOGGER.info(
"Registered {} metrics for pipeName:{}", metricRegistry.getMetrics().size(), pipeName);
}
LOGGER.info("pipe: {} - service started", pipeName);
}
private void init(long recordOffset) {
LOGGER.info("init pipe: {}", pipeName);
SnowflakeTelemetryPipeCreation pipeCreation =
new SnowflakeTelemetryPipeCreation(tableName, stageName, pipeName);
// wait for sinkConnector to start
createTableAndStage(pipeCreation);
// recover will only check pipe status and create pipe if it does not exist.
recover(pipeCreation);
try {
startCleaner(recordOffset, pipeCreation);
telemetryService.reportKafkaPartitionStart(pipeCreation);
} catch (Exception e) {
LOGGER.warn("Cleaner and Flusher threads shut down before initialization");
}
}
private boolean resetCleanerFiles() {
try {
LOGGER.warn("Resetting cleaner files {}", pipeName);
pipeStatus.incrementAndGetCleanerRestartCount();
// list stage again and try to clean the files leaked on stage
// this can throw unchecked, it needs to be wrapped in a try/catch
// if it fails again do not reset forceCleanerFileReset
List tmpCleanerFileNames = conn.listStage(stageName, prefix);
fileListLock.lock();
try {
cleanerFileNames.addAll(tmpCleanerFileNames);
cleanerFileNames = cleanerFileNames.stream().distinct().collect(Collectors.toList());
} finally {
fileListLock.unlock();
}
forceCleanerFileReset = false;
LOGGER.warn("Resetting cleaner files {} done", pipeName);
} catch (Throwable t) {
LOGGER.warn("Cleaner file reset encountered an error:\n{}", t.getMessage());
}
return forceCleanerFileReset;
}
// If there are files already on stage, we need to decide whether we will reprocess the offsets
// or we will purge them.
private void startCleaner(long recordOffset, SnowflakeTelemetryPipeCreation pipeCreation) {
// When cleaner start, scan stage for all files of this pipe.
// If we know that we are going to reprocess the file, then safely delete the file.
List currentFilesOnStage = conn.listStage(stageName, prefix);
List reprocessFiles = new ArrayList<>();
filterFileReprocess(currentFilesOnStage, reprocessFiles, recordOffset);
// Telemetry
pipeCreation.setFileCountRestart(currentFilesOnStage.size());
pipeCreation.setFileCountReprocessPurge(reprocessFiles.size());
// Files left on stage must be on ingestion, otherwise offset won't be committed and
// the file will be removed by the reprocess filter.
pipeStatus.addAndGetFileCountOnIngestion(currentFilesOnStage.size());
pipeStatus.addAndGetFileCountOnStage(currentFilesOnStage.size());
fileListLock.lock();
try {
cleanerFileNames.addAll(currentFilesOnStage);
} finally {
fileListLock.unlock();
}
cleanerExecutor.submit(
() -> {
LOGGER.info("pipe {}: cleaner started", pipeName);
while (!isStopped) {
try {
telemetryService.reportKafkaPartitionUsage(pipeStatus, false);
Thread.sleep(CLEAN_TIME);
if (forceCleanerFileReset && resetCleanerFiles()) {
continue;
}
checkStatus();
} catch (InterruptedException e) {
LOGGER.info("Cleaner terminated by an interrupt:\n{}", e.getMessage());
break;
} catch (Exception e) {
LOGGER.warn(
"Cleaner encountered an exception {}:\n{}\n{}",
e.getClass(),
e.getMessage(),
e.getStackTrace());
telemetryService.reportKafkaConnectFatalError(e.getMessage());
forceCleanerFileReset = true;
}
}
});
if (reprocessFiles.size() > 0) {
// After we start the cleaner thread, delay a while and start deleting files.
reprocessCleanerExecutor.submit(
() -> {
try {
Thread.sleep(CLEAN_TIME);
LOGGER.info(
"Purging files already present on the stage before start. ReprocessFileSize:{}",
reprocessFiles.size());
purge(reprocessFiles);
} catch (Exception e) {
LOGGER.error(
"Reprocess cleaner encountered an exception {}:\n{}\n{}",
e.getClass(),
e.getMessage(),
e.getStackTrace());
}
});
}
}
/**
* Does in place manipulation of passed currentFilesOnStage. The caller of this function passes
* in the list of files(name) on the stage. (ls @stageName)
*
* In return it expects the list of files (reprocessFiles) which is a subset of
* currentFilesOnStage.
*
*
How do we find list of reprocessFiles?
*
*
1. Find out the start offset from the list of files currently on stage.
*
*
2. If the current offset passed by the connector is less than any of the start offset of
* found files, we will reprocess this files and at the same time remove from
* currentListOfFiles. (Idea being if the current offset is still found on stage, it is not
* purged, so we will reprocess)
*
* @param currentFilesOnStage LIST.OF((ls @stageNAME))
* @param reprocessFiles Empty but we will fill this.
* @param recordOffset current offset
*/
private void filterFileReprocess(
List currentFilesOnStage, List reprocessFiles, long recordOffset) {
// iterate over a copy since reprocess files get removed from it
new LinkedList<>(currentFilesOnStage)
.forEach(
name -> {
long fileStartOffset = FileNameUtils.fileNameToStartOffset(name);
// If start offset of this file is greater than the offset of the record that is
// sent to the connector,
// all content of this file will be reprocessed. Thus this file can be deleted.
if (recordOffset <= fileStartOffset) {
reprocessFiles.add(name);
currentFilesOnStage.remove(name);
}
});
}
private void stopCleaner() {
cleanerExecutor.shutdownNow();
reprocessCleanerExecutor.shutdownNow();
LOGGER.info("pipe {}: cleaner terminated", pipeName);
}
private void insert(final SinkRecord record) {
// init pipe
if (!hasInitialized) {
// This will only be called once at the beginning when an offset arrives for first time
// after connector starts/rebalance
init(record.kafkaOffset());
metricsJmxReporter.start();
this.hasInitialized = true;
}
// only get offset token once when service context is initialized
// ignore ingested filesg
if (record.kafkaOffset() > processedOffset.get()) {
SinkRecord snowflakeRecord = record;
if (shouldConvertContent(snowflakeRecord.value())) {
snowflakeRecord = handleNativeRecord(snowflakeRecord, false);
}
if (shouldConvertContent(snowflakeRecord.key())) {
snowflakeRecord = handleNativeRecord(snowflakeRecord, true);
}
// broken record
if (isRecordBroken(snowflakeRecord)) {
writeBrokenDataToTableStage(snowflakeRecord);
// don't move committed offset in this case
// only move it in the normal cases
} else {
// lag telemetry, note that sink record timestamp might be null
if (snowflakeRecord.timestamp() != null
&& snowflakeRecord.timestampType() != NO_TIMESTAMP_TYPE) {
pipeStatus.updateKafkaLag(System.currentTimeMillis() - snowflakeRecord.timestamp());
}
SnowpipeBuffer tmpBuff = null;
bufferLock.lock();
try {
processedOffset.set(snowflakeRecord.kafkaOffset());
pipeStatus.setProcessedOffset(snowflakeRecord.kafkaOffset());
buffer.insert(snowflakeRecord);
if (buffer.getBufferSizeBytes() >= getFileSize()
|| (getRecordNumber() != 0 && buffer.getNumOfRecords() >= getRecordNumber())) {
tmpBuff = buffer;
this.buffer = new SnowpipeBuffer();
}
} finally {
bufferLock.unlock();
}
if (tmpBuff != null) {
flush(tmpBuff);
}
}
}
}
private boolean shouldConvertContent(final Object content) {
return content != null && !(content instanceof SnowflakeRecordContent);
}
private boolean isRecordBroken(final SinkRecord record) {
return isContentBroken(record.value()) || isContentBroken(record.key());
}
private boolean isContentBroken(final Object content) {
return content != null && ((SnowflakeRecordContent) content).isBroken();
}
private SinkRecord handleNativeRecord(SinkRecord record, boolean isKey) {
SnowflakeRecordContent newSFContent;
Schema schema = isKey ? record.keySchema() : record.valueSchema();
Object content = isKey ? record.key() : record.value();
try {
newSFContent = new SnowflakeRecordContent(schema, content, false);
} catch (Exception e) {
LOGGER.error("Native content parser error:\n{}", e.getMessage());
try {
// try to serialize this object and send that as broken record
ByteArrayOutputStream out = new ByteArrayOutputStream();
ObjectOutputStream os = new ObjectOutputStream(out);
os.writeObject(content);
newSFContent = new SnowflakeRecordContent(out.toByteArray());
} catch (Exception serializeError) {
LOGGER.error(
"Failed to convert broken native record to byte data:\n{}",
serializeError.getMessage());
throw e;
}
}
// create new sinkRecord
Schema keySchema = isKey ? new SnowflakeJsonSchema() : record.keySchema();
Object keyContent = isKey ? newSFContent : record.key();
Schema valueSchema = isKey ? record.valueSchema() : new SnowflakeJsonSchema();
Object valueContent = isKey ? record.value() : newSFContent;
return new SinkRecord(
record.topic(),
record.kafkaPartition(),
keySchema,
keyContent,
valueSchema,
valueContent,
record.kafkaOffset(),
record.timestamp(),
record.timestampType(),
record.headers());
}
private boolean shouldFlush() {
return (System.currentTimeMillis() - this.previousFlushTimeStamp) >= (getFlushTime() * 1000);
}
private void flushBuffer() {
// Just checking buffer size, no atomic operation required
if (buffer.isEmpty()) {
return;
}
SnowpipeBuffer tmpBuff;
bufferLock.lock();
try {
tmpBuff = buffer;
this.buffer = new SnowpipeBuffer();
} finally {
bufferLock.unlock();
}
flush(tmpBuff);
}
private void writeBrokenDataToTableStage(SinkRecord record) {
SnowflakeRecordContent key = (SnowflakeRecordContent) record.key();
SnowflakeRecordContent value = (SnowflakeRecordContent) record.value();
if (key != null) {
String fileName = FileNameUtils.brokenRecordFileName(prefix, record.kafkaOffset(), true);
conn.putToTableStage(tableName, fileName, snowflakeContentToByteArray(key));
pipeStatus.updateBrokenRecordMetrics(1l);
}
if (value != null) {
String fileName = FileNameUtils.brokenRecordFileName(prefix, record.kafkaOffset(), false);
conn.putToTableStage(tableName, fileName, snowflakeContentToByteArray(value));
pipeStatus.updateBrokenRecordMetrics(1l);
}
}
private byte[] snowflakeContentToByteArray(SnowflakeRecordContent content) {
if (content == null) {
return null;
}
if (content.isBroken()) {
return content.getBrokenData();
}
return Arrays.asList(content.getData()).toString().getBytes();
}
private long getOffset() {
if (fileNames.isEmpty()) {
return committedOffset.get();
}
List fileNamesCopy = new ArrayList<>();
List fileNamesForMetrics = new ArrayList<>();
fileListLock.lock();
try {
fileNamesCopy.addAll(fileNames);
fileNamesForMetrics.addAll(fileNames);
fileNames = new LinkedList<>();
} finally {
fileListLock.unlock();
}
LOGGER.info("pipe {}, ingest files: {}", pipeName, fileNamesCopy);
ingestionService.ingestFiles(fileNamesCopy);
// committedOffset should be updated only when ingestFiles has succeeded.
committedOffset.set(flushedOffset.get());
// update telemetry data
long currentTime = System.currentTimeMillis();
pipeStatus.setCommittedOffset(committedOffset.get() - 1);
pipeStatus.addAndGetFileCountOnIngestion(fileNamesForMetrics.size());
fileNamesForMetrics.forEach(
name ->
pipeStatus.updateCommitLag(currentTime - FileNameUtils.fileNameToTimeIngested(name)));
return committedOffset.get();
}
private void flush(final SnowpipeBuffer buff) {
if (buff == null || buff.isEmpty()) {
return;
}
this.previousFlushTimeStamp = System.currentTimeMillis();
// If we failed to submit/put, throw an runtime exception that kills the connector.
// SnowflakeThreadPoolUtils.flusherThreadPool.submit(
String fileName = FileNameUtils.fileName(prefix, buff.getFirstOffset(), buff.getLastOffset());
String content = buff.getData();
conn.putWithCache(stageName, fileName, content);
// compute metrics which will be exported to JMX for now.
// TODO: Send it to Telemetry API too
computeBufferMetrics(buff);
// This is safe and atomic
flushedOffset.updateAndGet((value) -> Math.max(buff.getLastOffset() + 1, value));
pipeStatus.setFlushedOffset(flushedOffset.get() - 1);
pipeStatus.addAndGetFileCountOnStage(1L); // plus one
pipeStatus.resetMemoryUsage();
fileListLock.lock();
try {
fileNames.add(fileName);
cleanerFileNames.add(fileName);
} finally {
fileListLock.unlock();
}
LOGGER.info("pipe {}, flush pipe: {}", pipeName, fileName);
}
private void checkStatus() {
// We are using a temporary list which will reset the cleanerFileNames
// After this checkStatus() call, we will have an updated cleanerFileNames which are subset of
// existing cleanerFileNames
// this time th
List tmpFileNames;
fileListLock.lock();
try {
tmpFileNames = cleanerFileNames;
cleanerFileNames = new LinkedList<>();
} finally {
fileListLock.unlock();
}
long currentTime = System.currentTimeMillis();
List loadedFiles = new LinkedList<>();
List failedFiles = new LinkedList<>();
// ingest report
// This will update the loadedFiles (successfully loaded) &
// failedFiles: PARTIAL + FAILED
// In any cases tmpFileNames will be updated.
// If we get all files in ingestReport, tmpFileNames will be empty
filterResultFromSnowpipeScan(
ingestionService.readIngestReport(tmpFileNames), tmpFileNames, loadedFiles, failedFiles);
// old files
List oldFiles = new LinkedList<>();
// iterate over a copy since failed files get removed from it
// Iterate over those files which were not found in ingest report call and are sitting more
// than an hour earlier.
// Also add those files into oldFiles which are not purged/found in ingestReport since last 10
// minutes.
new LinkedList<>(tmpFileNames)
.forEach(
name -> {
long time = FileNameUtils.fileNameToTimeIngested(name);
if (time < currentTime - ONE_HOUR) {
failedFiles.add(name);
tmpFileNames.remove(name);
} else if (time < currentTime - TEN_MINUTES) {
oldFiles.add(name);
}
});
// load history
// Use loadHistoryScan API to scan last one hour of data and if filter files from above
// filtered list.
// This is the last filtering we do and after this, we start purging loadedFiles and moving
// failedFiles to tableStage
if (!oldFiles.isEmpty()) {
filterResultFromSnowpipeScan(
ingestionService.readOneHourHistory(tmpFileNames, currentTime - ONE_HOUR),
tmpFileNames,
loadedFiles,
failedFiles);
}
purge(loadedFiles);
moveToTableStage(failedFiles);
fileListLock.lock();
try {
// Add back all those files which were neither found in ingestReport nor in loadHistoryScan
cleanerFileNames.addAll(tmpFileNames);
} finally {
fileListLock.unlock();
}
// update purged offset in telemetry
loadedFiles.forEach(
name ->
pipeStatus.setPurgedOffsetAtomically(
value -> Math.max(FileNameUtils.fileNameToEndOffset(name), value)));
// update file count in telemetry
int fileCountRemovedFromStage = loadedFiles.size() + failedFiles.size();
pipeStatus.addAndGetFileCountOnStage(-fileCountRemovedFromStage);
pipeStatus.addAndGetFileCountOnIngestion(-fileCountRemovedFromStage);
pipeStatus.updateFailedIngestionMetrics(failedFiles.size());
pipeStatus.addAndGetFileCountPurged(loadedFiles.size());
// update lag information
loadedFiles.forEach(
name ->
pipeStatus.updateIngestionLag(
currentTime - FileNameUtils.fileNameToTimeIngested(name)));
}
// fileStatus Map may include mapping of fileNames with their ingestion status.
// It can be received either from insertReport API or loadHistoryScan
private void filterResultFromSnowpipeScan(
Map fileStatus,
List allFiles,
List loadedFiles,
List failedFiles) {
fileStatus.forEach(
(name, status) -> {
switch (status) {
case LOADED:
loadedFiles.add(name);
allFiles.remove(name);
break;
case FAILED:
case PARTIALLY_LOADED:
failedFiles.add(name);
allFiles.remove(name);
break;
default:
// otherwise, do nothing
}
});
}
private void purge(List files) {
if (!files.isEmpty()) {
LOGGER.debug(
"Purging loaded files for pipe:{}, loadedFileCount:{}, loadedFiles:{}",
pipeName,
files.size(),
Arrays.toString(files.toArray()));
conn.purgeStage(stageName, files);
}
}
private void moveToTableStage(List failedFiles) {
if (!failedFiles.isEmpty()) {
LOGGER.debug(
"Moving failed files for pipe:{} to tableStage failedFileCount:{}, failedFiles:{}",
pipeName,
failedFiles.size(),
Arrays.toString(failedFiles.toArray()));
conn.moveToTableStage(tableName, stageName, failedFiles);
}
}
private void recover(SnowflakeTelemetryPipeCreation pipeCreation) {
if (conn.pipeExist(pipeName)) {
if (!conn.isPipeCompatible(tableName, stageName, pipeName)) {
throw SnowflakeErrors.ERROR_5005.getException(
"pipe name: " + pipeName, conn.getTelemetryClient());
}
LOGGER.info("pipe {}, recovered from existing pipe", pipeName);
pipeCreation.setReusePipe(true);
} else {
conn.createPipe(tableName, stageName, pipeName);
}
}
private void close() {
try {
stopCleaner();
} catch (Exception e) {
LOGGER.warn("Failed to terminate Cleaner or Flusher");
}
ingestionService.close();
telemetryService.reportKafkaPartitionUsage(pipeStatus, true);
LOGGER.info("pipe {}: service closed", pipeName);
}
/**
* SinkConnector ans SinkTasks start at the same time, however, SinkTasks need create table and
* wait SinkConnector to create stage. This method checks table and stage existence for at most
* 120 times(10 min) And then throws exceptions if table or stage doesn't exit
*/
private void createTableAndStage(SnowflakeTelemetryPipeCreation pipeCreation) {
// create table if not exists
if (conn.tableExist(tableName)) {
if (conn.isTableCompatible(tableName)) {
LOGGER.info("Using existing table {}.", tableName);
pipeCreation.setReuseTable(true);
} else {
throw SnowflakeErrors.ERROR_5003.getException(
"table name: " + tableName, telemetryService);
}
} else {
LOGGER.info("Creating new table {}.", tableName);
conn.createTable(tableName);
}
if (conn.stageExist(stageName)) {
if (conn.isStageCompatible(stageName)) {
LOGGER.info("Using existing stage {}.", stageName);
pipeCreation.setReuseStage(true);
} else {
throw SnowflakeErrors.ERROR_5004.getException(
"stage name: " + stageName, telemetryService);
}
} else {
LOGGER.info("Creating new stage {}.", stageName);
conn.createStage(stageName);
}
}
private boolean isBufferEmpty() {
return this.buffer.isEmpty();
}
/**
* called when we flush the buffer to internal stage by calling put API.
*
* @param buffer that was pushed in stage
*/
private void computeBufferMetrics(final SnowpipeBuffer buffer) {
if (enableCustomJMXMonitoring) {
partitionBufferSizeBytesHistogram.update(buffer.getBufferSizeBytes());
partitionBufferCountHistogram.update(buffer.getNumOfRecords());
}
}
/** Equivalent to unregistering all mbeans with a prefix JMX_METRIC_PREFIX */
private void unregisterPipeJMXMetrics() {
if (enableCustomJMXMonitoring) {
metricsJmxReporter.removeMetricsFromRegistry(this.pipeName);
}
}
/**
* Get Metric registry instance of this pipe
*
* @return Metric Registry (Non Null)
*/
public MetricRegistry getMetricRegistry() {
return this.metricRegistry;
}
/**
* Implementation of Buffer for Snowpipe based implementation of KC.
*
* Please note {@link #insert(SinkRecord)} API is called from {@link
* com.snowflake.kafka.connector.SnowflakeSinkTask#put(Collection)} API and it is possible the
* buffered data is present across multiple PUT apis.
*
*
Check the usage of {@link #getData()} to understand when we would empty this buffer and
* when we would generate files in internal stage for snowpipe to ingest later using Snowpipe's
* REST APIs
*/
private class SnowpipeBuffer extends PartitionBuffer {
private final StringBuilder stringBuilder;
private SnowpipeBuffer() {
super();
stringBuilder = new StringBuilder();
}
@Override
public void insert(SinkRecord record) {
String data = recordService.getProcessedRecordForSnowpipe(record);
if (getBufferSizeBytes() == 0L) {
setFirstOffset(record.kafkaOffset());
}
stringBuilder.append(data);
setNumOfRecords(getNumOfRecords() + 1);
setBufferSizeBytes(getBufferSizeBytes() + data.length() * 2L); // 1 char = 2 bytes
setLastOffset(record.kafkaOffset());
pipeStatus.addAndGetMemoryUsage(data.length() * 2L);
}
public String getData() {
String result = stringBuilder.toString();
LOGGER.debug(
"flush buffer: {} records, {} bytes, offset {} - {}",
getNumOfRecords(),
getBufferSizeBytes(),
getFirstOffset(),
getLastOffset());
pipeStatus.addAndGetTotalSizeOfData(getBufferSizeBytes());
pipeStatus.addAndGetTotalNumberOfRecord(getNumOfRecords());
return result;
}
@Override
public List getSinkRecords() {
throw new UnsupportedOperationException(
"SnowflakeSinkServiceV1 doesnt support getSinkRecords method");
}
}
}
/**
* Only used for testing Given a pipename, find out if buffer for this pipe has any data inserted.
*
* @param pipeName
* @return
*/
protected boolean isPartitionBufferEmpty(final String pipeName) {
if (pipes.containsKey(pipeName)) {
return pipes.get(pipeName).isBufferEmpty();
}
return false;
}
}