All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.snowflake.kafka.connector.internal.SnowflakeSinkServiceV1 Maven / Gradle / Ivy

package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.BUFFER_RECORD_COUNT;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.BUFFER_SIZE_BYTES;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.BUFFER_SUB_DOMAIN;
import static org.apache.kafka.common.record.TimestampType.NO_TIMESTAMP_TYPE;

import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.metrics.MetricsUtil;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryPipeCreation;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryPipeStatus;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import com.snowflake.kafka.connector.records.RecordService;
import com.snowflake.kafka.connector.records.SnowflakeJsonSchema;
import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig;
import com.snowflake.kafka.connector.records.SnowflakeRecordContent;
import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.sink.SinkRecord;

/**
 * This is per task configuration. A task can be assigned multiple partitions. Major methods are
 * startTask, insert, getOffset and close methods.
 *
 * 

StartTask: Called when partitions are assigned. Responsible for generating the POJOs. * *

Insert and getOffset are called when {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#put(Collection)} and {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#preCommit(Map)} APIs are called. */ class SnowflakeSinkServiceV1 implements SnowflakeSinkService { private final KCLogger LOGGER = new KCLogger(SnowflakeSinkServiceV1.class.getName()); private static final long ONE_HOUR = 60 * 60 * 1000L; private static final long TEN_MINUTES = 10 * 60 * 1000L; protected static final long CLEAN_TIME = 60 * 1000L; // one minutes // Set in config (Time based flush) in seconds private long flushTime; // Set in config (buffer size based flush) in bytes private long fileSize; // Set in config (Threshold before we send the buffer to internal stage) corresponds to # of // records in kafka private long recordNum; private final SnowflakeConnectionService conn; private final Map pipes; private final RecordService recordService; private boolean isStopped; private final SnowflakeTelemetryService telemetryService; private Map topic2TableMap; // Behavior to be set at the start of connector start. (For tombstone records) private SnowflakeSinkConnectorConfig.BehaviorOnNullValues behaviorOnNullValues; // default is true unless the configuration provided is false; // If this is true, we will enable Mbean for required classes and emit JMX metrics for monitoring private boolean enableCustomJMXMonitoring = SnowflakeSinkConnectorConfig.JMX_OPT_DEFAULT; SnowflakeSinkServiceV1(SnowflakeConnectionService conn) { if (conn == null || conn.isClosed()) { throw SnowflakeErrors.ERROR_5010.getException(); } this.fileSize = SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_DEFAULT; this.recordNum = SnowflakeSinkConnectorConfig.BUFFER_COUNT_RECORDS_DEFAULT; this.flushTime = SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_DEFAULT; this.pipes = new HashMap<>(); this.conn = conn; isStopped = false; this.telemetryService = conn.getTelemetryClient(); this.recordService = new RecordService(this.telemetryService); this.topic2TableMap = new HashMap<>(); // Setting the default value in constructor // meaning it will not ignore the null values (Tombstone records wont be ignored/filtered) this.behaviorOnNullValues = SnowflakeSinkConnectorConfig.BehaviorOnNullValues.DEFAULT; } /** * Create new ingestion task from existing table and stage, tries to reuse existing pipe and * recover previous task, otherwise, create a new pipe. * * @param tableName destination table name in Snowflake * @param topicPartition TopicPartition passed from Kafka */ @Override public void startPartition(final String tableName, final TopicPartition topicPartition) { String stageName = Utils.stageName(conn.getConnectorName(), tableName); String nameIndex = getNameIndex(topicPartition.topic(), topicPartition.partition()); if (pipes.containsKey(nameIndex)) { LOGGER.warn("task is already registered with {} partition", nameIndex); } else { String pipeName = Utils.pipeName(conn.getConnectorName(), tableName, topicPartition.partition()); pipes.put( nameIndex, new ServiceContext(tableName, stageName, pipeName, conn, topicPartition.partition())); } } @Override public void startPartitions( Collection partitions, Map topic2Table) { partitions.forEach(tp -> this.startPartition(Utils.tableName(tp.topic(), topic2Table), tp)); } @Override public void insert(final Collection records) { // note that records can be empty for (SinkRecord record : records) { // check if need to handle null value records if (recordService.shouldSkipNullValue(record, behaviorOnNullValues)) { continue; } // Might happen a count of record based flushing insert(record); } // check all sink context to see if they need to be flushed for (ServiceContext pipe : pipes.values()) { // Time based flushing if (pipe.shouldFlush()) { pipe.flushBuffer(); } } } @Override public void insert(SinkRecord record) { String nameIndex = getNameIndex(record.topic(), record.kafkaPartition()); // init a new topic partition if (!pipes.containsKey(nameIndex)) { LOGGER.warn( "Topic: {} Partition: {} hasn't been initialized by OPEN " + "function", record.topic(), record.kafkaPartition()); startPartition( Utils.tableName(record.topic(), this.topic2TableMap), new TopicPartition(record.topic(), record.kafkaPartition())); } pipes.get(nameIndex).insert(record); } @Override public long getOffset(final TopicPartition topicPartition) { String name = getNameIndex(topicPartition.topic(), topicPartition.partition()); if (pipes.containsKey(name)) { return pipes.get(name).getOffset(); } else { LOGGER.warn( "Topic: {} Partition: {} hasn't been initialized to get offset", topicPartition.topic(), topicPartition.partition()); return 0; } } @Override public int getPartitionCount() { return pipes.size(); } // used for testing only @Override public void callAllGetOffset() { for (ServiceContext pipe : pipes.values()) { pipe.getOffset(); } } @Override public void close(Collection partitions) { partitions.forEach( tp -> { String name = getNameIndex(tp.topic(), tp.partition()); ServiceContext sc = pipes.remove(name); if (sc != null) { try { sc.close(); } catch (Exception e) { LOGGER.error( "Failed to close sink service for Topic: {}, Partition: " + "{}\nMessage:{}", tp.topic(), tp.partition(), e.getMessage()); } finally { sc.unregisterPipeJMXMetrics(); } } else { LOGGER.warn( "Failed to close sink service for Topic: {}, Partition: {}, " + "sink service hasn't been initialized", tp.topic(), tp.partition()); } }); } @Override public void closeAll() { this.isStopped = true; // release all cleaner and flusher threads pipes.forEach( (name, context) -> { context.close(); context.unregisterPipeJMXMetrics(); }); pipes.clear(); } @Override public void setIsStoppedToTrue() { this.isStopped = true; // release all cleaner and flusher threads } @Override public boolean isClosed() { return this.isStopped; } @Override public void setRecordNumber(final long num) { if (num < 0) { LOGGER.error("number of record in each file is {}, it is negative, reset to" + " 0"); this.recordNum = 0; } else { this.recordNum = num; LOGGER.info("set number of record limitation to {}", num); } } @Override public void setFileSize(final long size) { if (size < SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_MIN) { LOGGER.error( "file size is {} bytes, it is smaller than the minimum file " + "size {} bytes, reset to the default file size", size, SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_DEFAULT); this.fileSize = SnowflakeSinkConnectorConfig.BUFFER_SIZE_BYTES_DEFAULT; } else { this.fileSize = size; LOGGER.info("set file size limitation to {} bytes", size); } } @Override public void setFlushTime(final long time) { if (time < SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_MIN) { LOGGER.error( "flush time is {} seconds, it is smaller than the minimum " + "flush time {} seconds, reset to the minimum flush time", time, SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_MIN); this.flushTime = SnowflakeSinkConnectorConfig.BUFFER_FLUSH_TIME_SEC_MIN; } else { this.flushTime = time; LOGGER.info("set flush time to {} seconds", time); } } @Override public void setTopic2TableMap(Map topic2TableMap) { this.topic2TableMap = topic2TableMap; } @Override public void setMetadataConfig(SnowflakeMetadataConfig configMap) { this.recordService.setMetadataConfig(configMap); } @Override public long getRecordNumber() { return this.recordNum; } @Override public long getFlushTime() { return this.flushTime; } @Override public long getFileSize() { return this.fileSize; } @Override public void setBehaviorOnNullValuesConfig( SnowflakeSinkConnectorConfig.BehaviorOnNullValues behavior) { this.behaviorOnNullValues = behavior; } @Override public void setCustomJMXMetrics(boolean enableJMX) { this.enableCustomJMXMonitoring = enableJMX; } @Override public SnowflakeSinkConnectorConfig.BehaviorOnNullValues getBehaviorOnNullValuesConfig() { return this.behaviorOnNullValues; } /** * Loop through all pipes in memory and find out the metric registry instance for that pipe. The * pipes object's key is not pipeName hence need to loop over. * * @param pipeName associated MetricRegistry to fetch * @return Optional MetricRegistry. (Empty if pipe was not found in pipes map) */ @Override public Optional getMetricRegistry(final String pipeName) { for (Map.Entry entry : this.pipes.entrySet()) { if (entry.getValue().pipeName.equalsIgnoreCase(pipeName)) { return Optional.of(entry.getValue().getMetricRegistry()); } } return Optional.empty(); } @VisibleForTesting protected static String getNameIndex(String topic, int partition) { return topic + "_" + partition; } private class ServiceContext { private final String tableName; private final String stageName; private final String pipeName; private final SnowflakeConnectionService conn; private final SnowflakeIngestionService ingestionService; private List fileNames; // Includes a list of files: // 1. Which are added after a flush into internal stage is successful // 2. While an app restarts and we do list on an internal stage to find out what needs to be // done on leaked files. private List cleanerFileNames; private SnowpipeBuffer buffer; private final String prefix; private final AtomicLong committedOffset; // loaded offset + 1 private final AtomicLong flushedOffset; // flushed offset (file on stage) private final AtomicLong processedOffset; // processed offset private long previousFlushTimeStamp; // threads private final ExecutorService cleanerExecutor; private final ExecutorService reprocessCleanerExecutor; private final Lock bufferLock; private final Lock fileListLock; // telemetry private final SnowflakeTelemetryPipeStatus pipeStatus; // non null private final MetricRegistry metricRegistry; // Wrapper on Metric registry instance which will hold all registered metrics for this pipe private final MetricsJmxReporter metricsJmxReporter; // buffer metrics, updated everytime when a buffer is flushed to internal stage private Histogram partitionBufferSizeBytesHistogram; // in Bytes private Histogram partitionBufferCountHistogram; // make the initialization lazy private boolean hasInitialized = false; private boolean forceCleanerFileReset = false; private ServiceContext( String tableName, String stageName, String pipeName, SnowflakeConnectionService conn, int partition) { this.pipeName = pipeName; this.tableName = tableName; this.stageName = stageName; this.conn = conn; this.fileNames = new LinkedList<>(); this.cleanerFileNames = new LinkedList<>(); this.buffer = new SnowpipeBuffer(); this.ingestionService = conn.buildIngestService(stageName, pipeName); this.prefix = FileNameUtils.filePrefix(conn.getConnectorName(), tableName, partition); this.processedOffset = new AtomicLong(-1); this.flushedOffset = new AtomicLong(-1); this.committedOffset = new AtomicLong(0); this.previousFlushTimeStamp = System.currentTimeMillis(); this.bufferLock = new ReentrantLock(); this.fileListLock = new ReentrantLock(); this.metricRegistry = new MetricRegistry(); this.metricsJmxReporter = new MetricsJmxReporter(this.metricRegistry, conn.getConnectorName()); this.pipeStatus = new SnowflakeTelemetryPipeStatus( tableName, stageName, pipeName, enableCustomJMXMonitoring, this.metricsJmxReporter); this.cleanerExecutor = Executors.newSingleThreadExecutor(); this.reprocessCleanerExecutor = Executors.newSingleThreadExecutor(); if (enableCustomJMXMonitoring) { partitionBufferCountHistogram = this.metricRegistry.histogram( MetricsUtil.constructMetricName(pipeName, BUFFER_SUB_DOMAIN, BUFFER_RECORD_COUNT)); partitionBufferSizeBytesHistogram = this.metricRegistry.histogram( MetricsUtil.constructMetricName(pipeName, BUFFER_SUB_DOMAIN, BUFFER_SIZE_BYTES)); LOGGER.info( "Registered {} metrics for pipeName:{}", metricRegistry.getMetrics().size(), pipeName); } LOGGER.info("pipe: {} - service started", pipeName); } private void init(long recordOffset) { LOGGER.info("init pipe: {}", pipeName); SnowflakeTelemetryPipeCreation pipeCreation = new SnowflakeTelemetryPipeCreation(tableName, stageName, pipeName); // wait for sinkConnector to start createTableAndStage(pipeCreation); // recover will only check pipe status and create pipe if it does not exist. recover(pipeCreation); try { startCleaner(recordOffset, pipeCreation); telemetryService.reportKafkaPartitionStart(pipeCreation); } catch (Exception e) { LOGGER.warn("Cleaner and Flusher threads shut down before initialization"); } } private boolean resetCleanerFiles() { try { LOGGER.warn("Resetting cleaner files {}", pipeName); pipeStatus.incrementAndGetCleanerRestartCount(); // list stage again and try to clean the files leaked on stage // this can throw unchecked, it needs to be wrapped in a try/catch // if it fails again do not reset forceCleanerFileReset List tmpCleanerFileNames = conn.listStage(stageName, prefix); fileListLock.lock(); try { cleanerFileNames.addAll(tmpCleanerFileNames); cleanerFileNames = cleanerFileNames.stream().distinct().collect(Collectors.toList()); } finally { fileListLock.unlock(); } forceCleanerFileReset = false; LOGGER.warn("Resetting cleaner files {} done", pipeName); } catch (Throwable t) { LOGGER.warn("Cleaner file reset encountered an error:\n{}", t.getMessage()); } return forceCleanerFileReset; } // If there are files already on stage, we need to decide whether we will reprocess the offsets // or we will purge them. private void startCleaner(long recordOffset, SnowflakeTelemetryPipeCreation pipeCreation) { // When cleaner start, scan stage for all files of this pipe. // If we know that we are going to reprocess the file, then safely delete the file. List currentFilesOnStage = conn.listStage(stageName, prefix); List reprocessFiles = new ArrayList<>(); filterFileReprocess(currentFilesOnStage, reprocessFiles, recordOffset); // Telemetry pipeCreation.setFileCountRestart(currentFilesOnStage.size()); pipeCreation.setFileCountReprocessPurge(reprocessFiles.size()); // Files left on stage must be on ingestion, otherwise offset won't be committed and // the file will be removed by the reprocess filter. pipeStatus.addAndGetFileCountOnIngestion(currentFilesOnStage.size()); pipeStatus.addAndGetFileCountOnStage(currentFilesOnStage.size()); fileListLock.lock(); try { cleanerFileNames.addAll(currentFilesOnStage); } finally { fileListLock.unlock(); } cleanerExecutor.submit( () -> { LOGGER.info("pipe {}: cleaner started", pipeName); while (!isStopped) { try { telemetryService.reportKafkaPartitionUsage(pipeStatus, false); Thread.sleep(CLEAN_TIME); if (forceCleanerFileReset && resetCleanerFiles()) { continue; } checkStatus(); } catch (InterruptedException e) { LOGGER.info("Cleaner terminated by an interrupt:\n{}", e.getMessage()); break; } catch (Exception e) { LOGGER.warn( "Cleaner encountered an exception {}:\n{}\n{}", e.getClass(), e.getMessage(), e.getStackTrace()); telemetryService.reportKafkaConnectFatalError(e.getMessage()); forceCleanerFileReset = true; } } }); if (reprocessFiles.size() > 0) { // After we start the cleaner thread, delay a while and start deleting files. reprocessCleanerExecutor.submit( () -> { try { Thread.sleep(CLEAN_TIME); LOGGER.info( "Purging files already present on the stage before start. ReprocessFileSize:{}", reprocessFiles.size()); purge(reprocessFiles); } catch (Exception e) { LOGGER.error( "Reprocess cleaner encountered an exception {}:\n{}\n{}", e.getClass(), e.getMessage(), e.getStackTrace()); } }); } } /** * Does in place manipulation of passed currentFilesOnStage. The caller of this function passes * in the list of files(name) on the stage. (ls @stageName) * *

In return it expects the list of files (reprocessFiles) which is a subset of * currentFilesOnStage. * *

How do we find list of reprocessFiles? * *

1. Find out the start offset from the list of files currently on stage. * *

2. If the current offset passed by the connector is less than any of the start offset of * found files, we will reprocess this files and at the same time remove from * currentListOfFiles. (Idea being if the current offset is still found on stage, it is not * purged, so we will reprocess) * * @param currentFilesOnStage LIST.OF((ls @stageNAME)) * @param reprocessFiles Empty but we will fill this. * @param recordOffset current offset */ private void filterFileReprocess( List currentFilesOnStage, List reprocessFiles, long recordOffset) { // iterate over a copy since reprocess files get removed from it new LinkedList<>(currentFilesOnStage) .forEach( name -> { long fileStartOffset = FileNameUtils.fileNameToStartOffset(name); // If start offset of this file is greater than the offset of the record that is // sent to the connector, // all content of this file will be reprocessed. Thus this file can be deleted. if (recordOffset <= fileStartOffset) { reprocessFiles.add(name); currentFilesOnStage.remove(name); } }); } private void stopCleaner() { cleanerExecutor.shutdownNow(); reprocessCleanerExecutor.shutdownNow(); LOGGER.info("pipe {}: cleaner terminated", pipeName); } private void insert(final SinkRecord record) { // init pipe if (!hasInitialized) { // This will only be called once at the beginning when an offset arrives for first time // after connector starts/rebalance init(record.kafkaOffset()); metricsJmxReporter.start(); this.hasInitialized = true; } // only get offset token once when service context is initialized // ignore ingested filesg if (record.kafkaOffset() > processedOffset.get()) { SinkRecord snowflakeRecord = record; if (shouldConvertContent(snowflakeRecord.value())) { snowflakeRecord = handleNativeRecord(snowflakeRecord, false); } if (shouldConvertContent(snowflakeRecord.key())) { snowflakeRecord = handleNativeRecord(snowflakeRecord, true); } // broken record if (isRecordBroken(snowflakeRecord)) { writeBrokenDataToTableStage(snowflakeRecord); // don't move committed offset in this case // only move it in the normal cases } else { // lag telemetry, note that sink record timestamp might be null if (snowflakeRecord.timestamp() != null && snowflakeRecord.timestampType() != NO_TIMESTAMP_TYPE) { pipeStatus.updateKafkaLag(System.currentTimeMillis() - snowflakeRecord.timestamp()); } SnowpipeBuffer tmpBuff = null; bufferLock.lock(); try { processedOffset.set(snowflakeRecord.kafkaOffset()); pipeStatus.setProcessedOffset(snowflakeRecord.kafkaOffset()); buffer.insert(snowflakeRecord); if (buffer.getBufferSizeBytes() >= getFileSize() || (getRecordNumber() != 0 && buffer.getNumOfRecords() >= getRecordNumber())) { tmpBuff = buffer; this.buffer = new SnowpipeBuffer(); } } finally { bufferLock.unlock(); } if (tmpBuff != null) { flush(tmpBuff); } } } } private boolean shouldConvertContent(final Object content) { return content != null && !(content instanceof SnowflakeRecordContent); } private boolean isRecordBroken(final SinkRecord record) { return isContentBroken(record.value()) || isContentBroken(record.key()); } private boolean isContentBroken(final Object content) { return content != null && ((SnowflakeRecordContent) content).isBroken(); } private SinkRecord handleNativeRecord(SinkRecord record, boolean isKey) { SnowflakeRecordContent newSFContent; Schema schema = isKey ? record.keySchema() : record.valueSchema(); Object content = isKey ? record.key() : record.value(); try { newSFContent = new SnowflakeRecordContent(schema, content, false); } catch (Exception e) { LOGGER.error("Native content parser error:\n{}", e.getMessage()); try { // try to serialize this object and send that as broken record ByteArrayOutputStream out = new ByteArrayOutputStream(); ObjectOutputStream os = new ObjectOutputStream(out); os.writeObject(content); newSFContent = new SnowflakeRecordContent(out.toByteArray()); } catch (Exception serializeError) { LOGGER.error( "Failed to convert broken native record to byte data:\n{}", serializeError.getMessage()); throw e; } } // create new sinkRecord Schema keySchema = isKey ? new SnowflakeJsonSchema() : record.keySchema(); Object keyContent = isKey ? newSFContent : record.key(); Schema valueSchema = isKey ? record.valueSchema() : new SnowflakeJsonSchema(); Object valueContent = isKey ? record.value() : newSFContent; return new SinkRecord( record.topic(), record.kafkaPartition(), keySchema, keyContent, valueSchema, valueContent, record.kafkaOffset(), record.timestamp(), record.timestampType(), record.headers()); } private boolean shouldFlush() { return (System.currentTimeMillis() - this.previousFlushTimeStamp) >= (getFlushTime() * 1000); } private void flushBuffer() { // Just checking buffer size, no atomic operation required if (buffer.isEmpty()) { return; } SnowpipeBuffer tmpBuff; bufferLock.lock(); try { tmpBuff = buffer; this.buffer = new SnowpipeBuffer(); } finally { bufferLock.unlock(); } flush(tmpBuff); } private void writeBrokenDataToTableStage(SinkRecord record) { SnowflakeRecordContent key = (SnowflakeRecordContent) record.key(); SnowflakeRecordContent value = (SnowflakeRecordContent) record.value(); if (key != null) { String fileName = FileNameUtils.brokenRecordFileName(prefix, record.kafkaOffset(), true); conn.putToTableStage(tableName, fileName, snowflakeContentToByteArray(key)); pipeStatus.updateBrokenRecordMetrics(1l); } if (value != null) { String fileName = FileNameUtils.brokenRecordFileName(prefix, record.kafkaOffset(), false); conn.putToTableStage(tableName, fileName, snowflakeContentToByteArray(value)); pipeStatus.updateBrokenRecordMetrics(1l); } } private byte[] snowflakeContentToByteArray(SnowflakeRecordContent content) { if (content == null) { return null; } if (content.isBroken()) { return content.getBrokenData(); } return Arrays.asList(content.getData()).toString().getBytes(); } private long getOffset() { if (fileNames.isEmpty()) { return committedOffset.get(); } List fileNamesCopy = new ArrayList<>(); List fileNamesForMetrics = new ArrayList<>(); fileListLock.lock(); try { fileNamesCopy.addAll(fileNames); fileNamesForMetrics.addAll(fileNames); fileNames = new LinkedList<>(); } finally { fileListLock.unlock(); } LOGGER.info("pipe {}, ingest files: {}", pipeName, fileNamesCopy); ingestionService.ingestFiles(fileNamesCopy); // committedOffset should be updated only when ingestFiles has succeeded. committedOffset.set(flushedOffset.get()); // update telemetry data long currentTime = System.currentTimeMillis(); pipeStatus.setCommittedOffset(committedOffset.get() - 1); pipeStatus.addAndGetFileCountOnIngestion(fileNamesForMetrics.size()); fileNamesForMetrics.forEach( name -> pipeStatus.updateCommitLag(currentTime - FileNameUtils.fileNameToTimeIngested(name))); return committedOffset.get(); } private void flush(final SnowpipeBuffer buff) { if (buff == null || buff.isEmpty()) { return; } this.previousFlushTimeStamp = System.currentTimeMillis(); // If we failed to submit/put, throw an runtime exception that kills the connector. // SnowflakeThreadPoolUtils.flusherThreadPool.submit( String fileName = FileNameUtils.fileName(prefix, buff.getFirstOffset(), buff.getLastOffset()); String content = buff.getData(); conn.putWithCache(stageName, fileName, content); // compute metrics which will be exported to JMX for now. // TODO: Send it to Telemetry API too computeBufferMetrics(buff); // This is safe and atomic flushedOffset.updateAndGet((value) -> Math.max(buff.getLastOffset() + 1, value)); pipeStatus.setFlushedOffset(flushedOffset.get() - 1); pipeStatus.addAndGetFileCountOnStage(1L); // plus one pipeStatus.resetMemoryUsage(); fileListLock.lock(); try { fileNames.add(fileName); cleanerFileNames.add(fileName); } finally { fileListLock.unlock(); } LOGGER.info("pipe {}, flush pipe: {}", pipeName, fileName); } private void checkStatus() { // We are using a temporary list which will reset the cleanerFileNames // After this checkStatus() call, we will have an updated cleanerFileNames which are subset of // existing cleanerFileNames // this time th List tmpFileNames; fileListLock.lock(); try { tmpFileNames = cleanerFileNames; cleanerFileNames = new LinkedList<>(); } finally { fileListLock.unlock(); } long currentTime = System.currentTimeMillis(); List loadedFiles = new LinkedList<>(); List failedFiles = new LinkedList<>(); // ingest report // This will update the loadedFiles (successfully loaded) & // failedFiles: PARTIAL + FAILED // In any cases tmpFileNames will be updated. // If we get all files in ingestReport, tmpFileNames will be empty filterResultFromSnowpipeScan( ingestionService.readIngestReport(tmpFileNames), tmpFileNames, loadedFiles, failedFiles); // old files List oldFiles = new LinkedList<>(); // iterate over a copy since failed files get removed from it // Iterate over those files which were not found in ingest report call and are sitting more // than an hour earlier. // Also add those files into oldFiles which are not purged/found in ingestReport since last 10 // minutes. new LinkedList<>(tmpFileNames) .forEach( name -> { long time = FileNameUtils.fileNameToTimeIngested(name); if (time < currentTime - ONE_HOUR) { failedFiles.add(name); tmpFileNames.remove(name); } else if (time < currentTime - TEN_MINUTES) { oldFiles.add(name); } }); // load history // Use loadHistoryScan API to scan last one hour of data and if filter files from above // filtered list. // This is the last filtering we do and after this, we start purging loadedFiles and moving // failedFiles to tableStage if (!oldFiles.isEmpty()) { filterResultFromSnowpipeScan( ingestionService.readOneHourHistory(tmpFileNames, currentTime - ONE_HOUR), tmpFileNames, loadedFiles, failedFiles); } purge(loadedFiles); moveToTableStage(failedFiles); fileListLock.lock(); try { // Add back all those files which were neither found in ingestReport nor in loadHistoryScan cleanerFileNames.addAll(tmpFileNames); } finally { fileListLock.unlock(); } // update purged offset in telemetry loadedFiles.forEach( name -> pipeStatus.setPurgedOffsetAtomically( value -> Math.max(FileNameUtils.fileNameToEndOffset(name), value))); // update file count in telemetry int fileCountRemovedFromStage = loadedFiles.size() + failedFiles.size(); pipeStatus.addAndGetFileCountOnStage(-fileCountRemovedFromStage); pipeStatus.addAndGetFileCountOnIngestion(-fileCountRemovedFromStage); pipeStatus.updateFailedIngestionMetrics(failedFiles.size()); pipeStatus.addAndGetFileCountPurged(loadedFiles.size()); // update lag information loadedFiles.forEach( name -> pipeStatus.updateIngestionLag( currentTime - FileNameUtils.fileNameToTimeIngested(name))); } // fileStatus Map may include mapping of fileNames with their ingestion status. // It can be received either from insertReport API or loadHistoryScan private void filterResultFromSnowpipeScan( Map fileStatus, List allFiles, List loadedFiles, List failedFiles) { fileStatus.forEach( (name, status) -> { switch (status) { case LOADED: loadedFiles.add(name); allFiles.remove(name); break; case FAILED: case PARTIALLY_LOADED: failedFiles.add(name); allFiles.remove(name); break; default: // otherwise, do nothing } }); } private void purge(List files) { if (!files.isEmpty()) { LOGGER.debug( "Purging loaded files for pipe:{}, loadedFileCount:{}, loadedFiles:{}", pipeName, files.size(), Arrays.toString(files.toArray())); conn.purgeStage(stageName, files); } } private void moveToTableStage(List failedFiles) { if (!failedFiles.isEmpty()) { LOGGER.debug( "Moving failed files for pipe:{} to tableStage failedFileCount:{}, failedFiles:{}", pipeName, failedFiles.size(), Arrays.toString(failedFiles.toArray())); conn.moveToTableStage(tableName, stageName, failedFiles); } } private void recover(SnowflakeTelemetryPipeCreation pipeCreation) { if (conn.pipeExist(pipeName)) { if (!conn.isPipeCompatible(tableName, stageName, pipeName)) { throw SnowflakeErrors.ERROR_5005.getException( "pipe name: " + pipeName, conn.getTelemetryClient()); } LOGGER.info("pipe {}, recovered from existing pipe", pipeName); pipeCreation.setReusePipe(true); } else { conn.createPipe(tableName, stageName, pipeName); } } private void close() { try { stopCleaner(); } catch (Exception e) { LOGGER.warn("Failed to terminate Cleaner or Flusher"); } ingestionService.close(); telemetryService.reportKafkaPartitionUsage(pipeStatus, true); LOGGER.info("pipe {}: service closed", pipeName); } /** * SinkConnector ans SinkTasks start at the same time, however, SinkTasks need create table and * wait SinkConnector to create stage. This method checks table and stage existence for at most * 120 times(10 min) And then throws exceptions if table or stage doesn't exit */ private void createTableAndStage(SnowflakeTelemetryPipeCreation pipeCreation) { // create table if not exists if (conn.tableExist(tableName)) { if (conn.isTableCompatible(tableName)) { LOGGER.info("Using existing table {}.", tableName); pipeCreation.setReuseTable(true); } else { throw SnowflakeErrors.ERROR_5003.getException( "table name: " + tableName, telemetryService); } } else { LOGGER.info("Creating new table {}.", tableName); conn.createTable(tableName); } if (conn.stageExist(stageName)) { if (conn.isStageCompatible(stageName)) { LOGGER.info("Using existing stage {}.", stageName); pipeCreation.setReuseStage(true); } else { throw SnowflakeErrors.ERROR_5004.getException( "stage name: " + stageName, telemetryService); } } else { LOGGER.info("Creating new stage {}.", stageName); conn.createStage(stageName); } } private boolean isBufferEmpty() { return this.buffer.isEmpty(); } /** * called when we flush the buffer to internal stage by calling put API. * * @param buffer that was pushed in stage */ private void computeBufferMetrics(final SnowpipeBuffer buffer) { if (enableCustomJMXMonitoring) { partitionBufferSizeBytesHistogram.update(buffer.getBufferSizeBytes()); partitionBufferCountHistogram.update(buffer.getNumOfRecords()); } } /** Equivalent to unregistering all mbeans with a prefix JMX_METRIC_PREFIX */ private void unregisterPipeJMXMetrics() { if (enableCustomJMXMonitoring) { metricsJmxReporter.removeMetricsFromRegistry(this.pipeName); } } /** * Get Metric registry instance of this pipe * * @return Metric Registry (Non Null) */ public MetricRegistry getMetricRegistry() { return this.metricRegistry; } /** * Implementation of Buffer for Snowpipe based implementation of KC. * *

Please note {@link #insert(SinkRecord)} API is called from {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#put(Collection)} API and it is possible the * buffered data is present across multiple PUT apis. * *

Check the usage of {@link #getData()} to understand when we would empty this buffer and * when we would generate files in internal stage for snowpipe to ingest later using Snowpipe's * REST APIs */ private class SnowpipeBuffer extends PartitionBuffer { private final StringBuilder stringBuilder; private SnowpipeBuffer() { super(); stringBuilder = new StringBuilder(); } @Override public void insert(SinkRecord record) { String data = recordService.getProcessedRecordForSnowpipe(record); if (getBufferSizeBytes() == 0L) { setFirstOffset(record.kafkaOffset()); } stringBuilder.append(data); setNumOfRecords(getNumOfRecords() + 1); setBufferSizeBytes(getBufferSizeBytes() + data.length() * 2L); // 1 char = 2 bytes setLastOffset(record.kafkaOffset()); pipeStatus.addAndGetMemoryUsage(data.length() * 2L); } public String getData() { String result = stringBuilder.toString(); LOGGER.debug( "flush buffer: {} records, {} bytes, offset {} - {}", getNumOfRecords(), getBufferSizeBytes(), getFirstOffset(), getLastOffset()); pipeStatus.addAndGetTotalSizeOfData(getBufferSizeBytes()); pipeStatus.addAndGetTotalNumberOfRecord(getNumOfRecords()); return result; } @Override public List getSinkRecords() { throw new UnsupportedOperationException( "SnowflakeSinkServiceV1 doesnt support getSinkRecords method"); } } } /** * Only used for testing Given a pipename, find out if buffer for this pipe has any data inserted. * * @param pipeName * @return */ protected boolean isPartitionBufferEmpty(final String pipeName) { if (pipes.containsKey(pipeName)) { return pipes.get(pipeName).isBufferEmpty(); } return false; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy