
com.snowflake.kafka.connector.SnowflakeSinkConnector Maven / Gradle / Ivy
/*
* Copyright (c) 2019 Snowflake Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.snowflake.kafka.connector;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionServiceFactory;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.kafka.common.config.Config;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import org.apache.kafka.connect.sink.SinkConnector;
/**
* SnowflakeSinkConnector implements SinkConnector for Kafka Connect framework.
*
* Expected configuration: including topic names, partition numbers, snowflake connection info
* and credentials info
*
*
Creates snowflake internal stages, snowflake tables provides configuration to SinkTasks
* running on Kafka Connect Workers.
*/
public class SnowflakeSinkConnector extends SinkConnector {
// create logger without correlationId for now
private static KCLogger LOGGER = new KCLogger(SnowflakeSinkConnector.class.getName());
private Map config; // connector configuration, provided by
// user through kafka connect framework
// SnowflakeJDBCWrapper provides methods to interact with user's snowflake
// account and executes queries
private SnowflakeConnectionService conn;
// Snowflake Telemetry provides methods to report usage statistics
private SnowflakeTelemetryService telemetryClient;
private long connectorStartTime;
// Kafka Connect starts sink tasks without waiting for setup in
// SnowflakeSinkConnector to finish.
// This causes race conditions for: config validation, tables and stages
// creation, etc.
// Using setupComplete to synchronize
private boolean setupComplete;
/** No-Arg constructor. Required by Kafka Connect framework */
public SnowflakeSinkConnector() {
setupComplete = false;
}
/**
* start method will only be called on a clean connector, i.e. it has either just been
* instantiated and initialized or stop () has been invoked. loads configuration and validates.
*
* Creates snowflake internal stages and snowflake tables
*
* @param parsedConfig has the configuration settings
*/
@Override
public void start(final Map parsedConfig) {
LOGGER.info("SnowflakeSinkConnector:starting...");
Utils.checkConnectorVersion();
setupComplete = false;
connectorStartTime = System.currentTimeMillis();
config = new HashMap<>(parsedConfig);
SnowflakeSinkConnectorConfig.setDefaultValues(config);
// modify invalid connector name
Utils.convertAppName(config);
Utils.validateConfig(config);
// enable mdc logging if needed
KCLogger.toggleGlobalMdcLoggingContext(
Boolean.parseBoolean(
config.getOrDefault(
SnowflakeSinkConnectorConfig.ENABLE_MDC_LOGGING_CONFIG,
SnowflakeSinkConnectorConfig.ENABLE_MDC_LOGGING_DEFAULT)));
// enable proxy
Utils.enableJVMProxy(config);
// create a persisted connection, and validate snowflake connection
// config as a side effect
conn = SnowflakeConnectionServiceFactory.builder().setProperties(config).build();
telemetryClient = conn.getTelemetryClient();
telemetryClient.reportKafkaConnectStart(connectorStartTime, this.config);
setupComplete = true;
LOGGER.info("SnowflakeSinkConnector:started");
}
/**
* Stop method will be called to stop a connector, cleans up snowflake internal stages, after
* making sure that there are no pending files to ingest.
*
* Cleans up pipes, after making sure there are no pending files to ingest.
*
*
Also ensures that there are no leaked stages, no leaked staged files, and no leaked pipes
*/
@Override
public void stop() {
setupComplete = false;
LOGGER.info("SnowflakeSinkConnector:stopped");
telemetryClient.reportKafkaConnectStop(connectorStartTime);
}
// TODO (post GA): override reconfigure(java.util.Map props)
// Default implementation shuts down all external network connections.
// We can make it more efficient by identifying configuration changes,
// creating new snowflake internal stages, new snowflake tables, new pipes,
// for newly added topics;
// and cleaning up stages for topics that are not in the new configuration,
// and
// cleaning up pipes for partitions that are not in the new configuration.
/** @return Sink task class */
@Override
public Class extends Task> taskClass() {
return SnowflakeSinkTask.class;
}
/**
* taskConfigs method returns a set of configurations for SinkTasks based on the current
* configuration, producing at most 'maxTasks' configurations
*
* @param maxTasks maximum number of SinkTasks for this instance of SnowflakeSinkConnector
* @return a list containing 'maxTasks' copies of the configuration
*/
@Override
public List