All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.snowflake.kafka.connector.internal.streaming.StreamingUtils Maven / Gradle / Ivy

There is a newer version: 2.4.1
Show newest version
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.BOOLEAN_VALIDATOR;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.CUSTOM_SNOWFLAKE_CONVERTERS;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ERRORS_LOG_ENABLE_CONFIG;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ERRORS_TOLERANCE_CONFIG;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.ErrorTolerance;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.INGESTION_METHOD_OPT;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.KEY_CONVERTER_CONFIG_FIELD;
import static com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig.VALUE_CONVERTER_CONFIG_FIELD;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.snowflake.kafka.connector.SnowflakeSinkConnectorConfig;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.BufferThreshold;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import net.snowflake.ingest.utils.Constants;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.record.DefaultRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* Utility class/Helper methods for streaming related ingestion. */
public class StreamingUtils {
  private static final Logger LOGGER = LoggerFactory.getLogger(StreamingUtils.class);

  // Streaming Ingest API related fields

  protected static final Duration DURATION_BETWEEN_GET_OFFSET_TOKEN_RETRY = Duration.ofSeconds(1);

  protected static final int MAX_GET_OFFSET_TOKEN_RETRIES = 3;

  // Buffer related defaults and minimum set at connector level by clients/customers.
  public static final long STREAMING_BUFFER_FLUSH_TIME_MINIMUM_SEC =
      Duration.ofSeconds(1).getSeconds();

  public static final long STREAMING_BUFFER_FLUSH_TIME_DEFAULT_SEC =
      Duration.ofSeconds(10).getSeconds();

  protected static final long STREAMING_BUFFER_COUNT_RECORDS_DEFAULT = 10_000L;

  /**
   * Keeping this default as ~ 20MB.
   *
   * 

Logic behind this optimium value is we will do gzip compression and json to UTF conversion * which will account to almost 95% compression. * *

1 MB is an ideal size for streaming ingestion so 95% if 20MB = 1MB */ protected static final long STREAMING_BUFFER_BYTES_DEFAULT = 20_000_000; private static final Set DISALLOWED_CONVERTERS_STREAMING = CUSTOM_SNOWFLAKE_CONVERTERS; private static final String STRING_CONVERTER_KEYWORD = "StringConverter"; private static final String BYTE_ARRAY_CONVERTER_KEYWORD = "ByteArrayConverter"; // excluding key, value and headers: 5 bytes length + 10 bytes timestamp + 5 bytes offset + 1 // byte attributes. (This is not for record metadata, this is before we transform to snowflake // understood JSON) // This is overhead size for calculating while buffering Kafka records. public static final int MAX_RECORD_OVERHEAD_BYTES = DefaultRecord.MAX_RECORD_OVERHEAD; // TODO: Modify STREAMING_CONSTANT to Constants. after SNOW-352846 is released public static final String STREAMING_CONSTANT_AUTHORIZATION_TYPE = "authorization_type"; public static final String STREAMING_CONSTANT_JWT = "JWT"; public static final String STREAMING_CONSTANT_OAUTH = "OAuth"; public static final String STREAMING_CONSTANT_OAUTH_CLIENT_ID = "oauth_client_id"; public static final String STREAMING_CONSTANT_OAUTH_CLIENT_SECRET = "oauth_client_secret"; public static final String STREAMING_CONSTANT_OAUTH_REFRESH_TOKEN = "oauth_refresh_token"; /* Maps streaming client's property keys to what we got from snowflake KC config file. */ public static Map convertConfigForStreamingClient( Map connectorConfig) { Map streamingPropertiesMap = new HashMap<>(); connectorConfig.computeIfPresent( Utils.SF_URL, (key, value) -> { streamingPropertiesMap.put(Constants.ACCOUNT_URL, value); return value; }); connectorConfig.computeIfPresent( Utils.SF_ROLE, (key, value) -> { streamingPropertiesMap.put(Constants.ROLE, value); return value; }); connectorConfig.computeIfPresent( Utils.SF_USER, (key, value) -> { streamingPropertiesMap.put(Constants.USER, value); return value; }); connectorConfig.computeIfPresent( Utils.SF_AUTHENTICATOR, (key, value) -> { if (value.equals(Utils.SNOWFLAKE_JWT)) { streamingPropertiesMap.put( STREAMING_CONSTANT_AUTHORIZATION_TYPE, STREAMING_CONSTANT_JWT); } if (value.equals(Utils.OAUTH)) { streamingPropertiesMap.put( STREAMING_CONSTANT_AUTHORIZATION_TYPE, STREAMING_CONSTANT_OAUTH); } return value; }); connectorConfig.computeIfPresent( Utils.SF_PRIVATE_KEY, (key, value) -> { streamingPropertiesMap.put(Constants.PRIVATE_KEY, value); return value; }); connectorConfig.computeIfPresent( Utils.PRIVATE_KEY_PASSPHRASE, (key, value) -> { if (!value.isEmpty()) { streamingPropertiesMap.put(Constants.PRIVATE_KEY_PASSPHRASE, value); } return value; }); connectorConfig.computeIfPresent( Utils.SF_OAUTH_CLIENT_ID, (key, value) -> { streamingPropertiesMap.put(STREAMING_CONSTANT_OAUTH_CLIENT_ID, value); return value; }); connectorConfig.computeIfPresent( Utils.SF_OAUTH_CLIENT_SECRET, (key, value) -> { streamingPropertiesMap.put(STREAMING_CONSTANT_OAUTH_CLIENT_SECRET, value); return value; }); connectorConfig.computeIfPresent( Utils.SF_OAUTH_REFRESH_TOKEN, (key, value) -> { streamingPropertiesMap.put(STREAMING_CONSTANT_OAUTH_REFRESH_TOKEN, value); return value; }); return streamingPropertiesMap; } /* Returns true if sf connector config has error.tolerance = ALL */ public static boolean tolerateErrors(Map sfConnectorConfig) { String errorsTolerance = sfConnectorConfig.getOrDefault(ERRORS_TOLERANCE_CONFIG, ErrorTolerance.NONE.toString()); return ErrorTolerance.valueOf(errorsTolerance.toUpperCase()).equals(ErrorTolerance.ALL); } /* Returns true if connector config has errors.log.enable = true */ public static boolean logErrors(Map sfConnectorConfig) { return Boolean.parseBoolean(sfConnectorConfig.getOrDefault(ERRORS_LOG_ENABLE_CONFIG, "false")); } /* Returns dlq topic name if connector config has errors.deadletterqueue.topic.name set */ public static String getDlqTopicName(Map sfConnectorConfig) { return sfConnectorConfig.getOrDefault(ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG, ""); } /** * Validate Streaming snowpipe related config provided by config(customer's config) * * @param inputConfig given in connector json file * @return map of invalid parameters */ public static ImmutableMap validateStreamingSnowpipeConfig( final Map inputConfig) { Map invalidParams = new HashMap<>(); // For snowpipe_streaming, role should be non empty if (inputConfig.containsKey(INGESTION_METHOD_OPT)) { try { // This throws an exception if config value is invalid. IngestionMethodConfig.VALIDATOR.ensureValid( INGESTION_METHOD_OPT, inputConfig.get(INGESTION_METHOD_OPT)); if (inputConfig .get(INGESTION_METHOD_OPT) .equalsIgnoreCase(IngestionMethodConfig.SNOWPIPE_STREAMING.toString())) { // check if buffer thresholds are within permissible range invalidParams.putAll( BufferThreshold.validateBufferThreshold( inputConfig, IngestionMethodConfig.SNOWPIPE_STREAMING)); invalidParams.putAll(validateConfigConverters(KEY_CONVERTER_CONFIG_FIELD, inputConfig)); invalidParams.putAll(validateConfigConverters(VALUE_CONVERTER_CONFIG_FIELD, inputConfig)); // Validate if snowflake role is present if (!inputConfig.containsKey(Utils.SF_ROLE) || Strings.isNullOrEmpty(inputConfig.get(Utils.SF_ROLE))) { invalidParams.put( Utils.SF_ROLE, Utils.formatString( "Config:{} should be present if ingestionMethod is:{}", Utils.SF_ROLE, inputConfig.get(INGESTION_METHOD_OPT))); } /** * Only checking in streaming since we are utilizing the values before we send it to * DLQ/output to log file */ if (inputConfig.containsKey(ERRORS_TOLERANCE_CONFIG)) { SnowflakeSinkConnectorConfig.ErrorTolerance.VALIDATOR.ensureValid( ERRORS_TOLERANCE_CONFIG, inputConfig.get(ERRORS_TOLERANCE_CONFIG)); } if (inputConfig.containsKey(ERRORS_LOG_ENABLE_CONFIG)) { BOOLEAN_VALIDATOR.ensureValid( ERRORS_LOG_ENABLE_CONFIG, inputConfig.get(ERRORS_LOG_ENABLE_CONFIG)); } // Valid schematization for Snowpipe Streaming invalidParams.putAll(validateSchematizationConfig(inputConfig)); } } catch (ConfigException exception) { invalidParams.put( INGESTION_METHOD_OPT, Utils.formatString( "Kafka config:{} error:{}", INGESTION_METHOD_OPT, exception.getMessage())); } } return ImmutableMap.copyOf(invalidParams); } /** * Validates if key and value converters are allowed values if {@link * IngestionMethodConfig#SNOWPIPE_STREAMING} is used. * *

Map if invalid parameters */ private static Map validateConfigConverters( final String inputConfigConverterField, Map inputConfig) { Map invalidParams = new HashMap<>(); if (inputConfig.containsKey(inputConfigConverterField) && DISALLOWED_CONVERTERS_STREAMING.contains(inputConfig.get(inputConfigConverterField))) { invalidParams.put( inputConfigConverterField, Utils.formatString( "Config:{} has provided value:{}. If ingestionMethod is:{}, Snowflake Custom" + " Converters are not allowed. \n" + "Invalid Converters:{}", inputConfigConverterField, inputConfig.get(inputConfigConverterField), IngestionMethodConfig.SNOWPIPE_STREAMING, Iterables.toString(DISALLOWED_CONVERTERS_STREAMING))); } return invalidParams; } /** * Validates if the configs are allowed values when schematization is enabled. * *

return a map of invalid params */ private static Map validateSchematizationConfig(Map inputConfig) { Map invalidParams = new HashMap<>(); if (inputConfig.containsKey(SnowflakeSinkConnectorConfig.ENABLE_SCHEMATIZATION_CONFIG)) { BOOLEAN_VALIDATOR.ensureValid( SnowflakeSinkConnectorConfig.ENABLE_SCHEMATIZATION_CONFIG, inputConfig.get(SnowflakeSinkConnectorConfig.ENABLE_SCHEMATIZATION_CONFIG)); if (Boolean.parseBoolean( inputConfig.get(SnowflakeSinkConnectorConfig.ENABLE_SCHEMATIZATION_CONFIG)) && inputConfig.get(VALUE_CONVERTER_CONFIG_FIELD) != null && (inputConfig.get(VALUE_CONVERTER_CONFIG_FIELD).contains(STRING_CONVERTER_KEYWORD) || inputConfig .get(VALUE_CONVERTER_CONFIG_FIELD) .contains(BYTE_ARRAY_CONVERTER_KEYWORD))) { invalidParams.put( inputConfig.get(VALUE_CONVERTER_CONFIG_FIELD), Utils.formatString( "The value converter:{} is not supported with schematization.", inputConfig.get(VALUE_CONVERTER_CONFIG_FIELD))); } } return invalidParams; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy