net.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel Maven / Gradle / Ivy
/*
* Copyright (c) 2021 Snowflake Computing Inc. All rights reserved.
*/
package net.snowflake.ingest.streaming;
import java.time.ZoneId;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import javax.annotation.Nullable;
import net.snowflake.ingest.streaming.internal.ColumnProperties;
/**
* A logical partition that represents a connection to a single Snowflake table, data will be
* ingested into the channel, and then flushed to Snowflake table periodically in the background.
*
* Channels are identified by their name and only one channel with the same name may ingest data
* at the same time. When a new channel is opened, all previously opened channels with the same name
* are invalidated (this applies for the table globally. not just in a single JVM). In order to
* ingest data from multiple threads/clients/applications, we recommend opening multiple channels,
* each with a different name.
*
*
Thread safety note: Implementations of this interface are required to be thread safe.
*/
public interface SnowflakeStreamingIngestChannel {
/**
* Get the fully qualified channel name
*
* @return fully qualified name of the channel, in the format of
* dbName.schemaName.tableName.channelName
*/
String getFullyQualifiedName();
/**
* Get the name of the channel
*
* @return name of the channel
*/
String getName();
/**
* Get the database name
*
* @return name of the database
*/
String getDBName();
/**
* Get the schema name
*
* @return name of the schema
*/
String getSchemaName();
/**
* Get the table name
*
* @return name of the table
*/
String getTableName();
/**
* Get the fully qualified table name that the channel belongs to
*
* @return fully qualified table name, in the format of dbName.schemaName.tableName
*/
String getFullyQualifiedTableName();
/** @return a boolean which indicates whether the channel is valid */
boolean isValid();
/** @return a boolean which indicates whether the channel is closed */
boolean isClosed();
/**
* Close the channel, this function will make sure all the data in this channel is committed
*
* @return a completable future which will be completed when the channel is closed
*/
CompletableFuture close();
/**
* Close the channel, this function will make sure all the data in this channel is committed
*
* Note that this call with drop=true will delete Offset
* Token and other state from Snowflake servers unless the channel has already been opened in
* another client. So only use it if you are completely done ingesting data for this channel. If
* you open a channel with the same name in the future, it will behave like a new channel.
*
* @param drop if true, the channel will be dropped after all data is successfully committed.
* @return a completable future which will be completed when the channel is closed
*/
CompletableFuture close(boolean drop);
/**
* Insert one row into the channel, the row is represented using Map where the key is column name
* and the value is a row of data. The following table summarizes supported value types and their
* formats:
*
*
*
* Snowflake Column Type
* Allowed Java Data Type
*
*
* CHAR, VARCHAR
*
*
* - String
* - primitive data types (int, boolean, char, …)
*
*
*
*
* BINARY
*
*
* - byte[]
* - String (hex-encoded)
*
*
*
*
* NUMBER, FLOAT
*
*
* - numeric types (BigInteger, BigDecimal, byte, int, double, …)
* - String
*
*
*
*
* BOOLEAN
*
*
* - boolean
* - numeric types (BigInteger, BigDecimal, byte, int, double, …)
* - String
*
* See boolean conversion details.
*
*
*
* TIME
*
*
* - {@link java.time.LocalTime}
* - {@link java.time.OffsetTime}
* -
* String (in one of the following formats):
*
* - {@link java.time.format.DateTimeFormatter#ISO_LOCAL_TIME}
* - {@link java.time.format.DateTimeFormatter#ISO_OFFSET_TIME}
* - Integer-stored time (see Snowflake Docs for more details)
*
*
*
*
*
*
*
* DATE
*
*
* - {@link java.time.LocalDate}
* - {@link java.time.LocalDateTime}
* - {@link java.time.OffsetDateTime}
* - {@link java.time.ZonedDateTime}
* - {@link java.time.Instant}
* -
* String (in one of the following formats):
*
* - {@link java.time.format.DateTimeFormatter#ISO_LOCAL_DATE}
* - {@link java.time.format.DateTimeFormatter#ISO_LOCAL_DATE_TIME}
* - {@link java.time.format.DateTimeFormatter#ISO_OFFSET_DATE_TIME}
* - {@link java.time.format.DateTimeFormatter#ISO_ZONED_DATE_TIME}
* - Integer-stored date (see Snowflake Docs for more details)
*
*
*
*
*
*
* TIMESTAMP_NTZ, TIMESTAMP_LTZ, TIMESTAMP_TZ
*
*
* - {@link java.time.LocalDate}
* - {@link java.time.LocalDateTime}
* - {@link java.time.OffsetDateTime}
* - {@link java.time.ZonedDateTime}
* - {@link java.time.Instant}
* -
* String (in one of the following formats):
*
* - {@link java.time.format.DateTimeFormatter#ISO_LOCAL_DATE}
* - {@link java.time.format.DateTimeFormatter#ISO_LOCAL_DATE_TIME}
* - {@link java.time.format.DateTimeFormatter#ISO_OFFSET_DATE_TIME}
* - {@link java.time.format.DateTimeFormatter#ISO_ZONED_DATE_TIME}
* - Integer-stored timestamp (see Snowflake Docs for more details)
*
*
*
*
*
* For TIMESTAMP_LTZ and TIMESTAMP_TZ, all input without timezone will be by default interpreted in the timezone "America/Los_Angeles". This can be changed by calling {@link net.snowflake.ingest.streaming.OpenChannelRequest.OpenChannelRequestBuilder#setDefaultTimezone(ZoneId)}.
*
*
* VARIANT, ARRAY
*
*
* - String (must be a valid JSON value)
* - primitive data types and their arrays
* - BigInteger, BigDecimal
* - {@link java.time.LocalDate}
* - {@link java.time.LocalDateTime}
* - {@link java.time.OffsetDateTime}
* - {@link java.time.ZonedDateTime}
* - Map<String, T> where T is a valid VARIANT type
* - T[] where T is a valid VARIANT type
* - List<T> where T is a valid VARIANT type
*
*
*
*
* OBJECT
*
*
* - String (must be a valid JSON object)
* - Map<String, T> where T is a valid variant type
*
*
*
*
* GEOGRAPHY, GEOMETRY
* Not supported
*
*
*
*
*
* @param row object data to write. For predictable results, we recommend not to concurrently
* modify the input row data.
* @param offsetToken offset of given row, used for replay in case of failures. It could be null
* if you don't plan on replaying or can't replay
* @return insert response that possibly contains errors because of insertion failures
*/
InsertValidationResponse insertRow(Map row, @Nullable String offsetToken);
/**
* Insert a batch of rows into the channel, each row is represented using Map where the key is
* column name and the value is a row of data. See {@link
* SnowflakeStreamingIngestChannel#insertRow(Map, String)} for more information about accepted
* values.
*
* @param rows object data to write
* @param startOffsetToken start offset of the batch/row-set
* @param endOffsetToken end offset of the batch/row-set, used for replay in case of failures, *
* It could be null if you don't plan on replaying or can't replay
* @return insert response that possibly contains errors because of insertion failures
*/
InsertValidationResponse insertRows(
Iterable