All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.source.RowDataBoundedDeltaSourceBuilder Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
package io.delta.flink.source;

import java.util.Arrays;
import java.util.List;

import io.delta.flink.source.internal.builder.BoundedDeltaSourceBuilder;
import io.delta.flink.source.internal.builder.DeltaBulkFormat;
import io.delta.flink.source.internal.builder.RowDataFormat;
import io.delta.flink.source.internal.enumerator.supplier.BoundedSnapshotSupplierFactory;
import io.delta.flink.source.internal.utils.SourceSchema;
import org.apache.flink.core.fs.Path;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.RowType;
import org.apache.hadoop.conf.Configuration;
import static io.delta.flink.source.internal.DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION;

/**
 * A builder class for {@link DeltaSource} for a stream of {@link RowData} where the created source
 * instance will operate in Bounded mode.
 * 

* For most common use cases use {@link DeltaSource#forBoundedRowData} utility method to instantiate * the source. After instantiation of this builder you can either call {@link * RowDataBoundedDeltaSourceBuilder#build()} method to get the instance of a {@link DeltaSource} or * configure additional options using builder's API. */ public class RowDataBoundedDeltaSourceBuilder extends BoundedDeltaSourceBuilder { RowDataBoundedDeltaSourceBuilder( Path tablePath, Configuration hadoopConfiguration, BoundedSnapshotSupplierFactory snapshotSupplierFactory) { super(tablePath, hadoopConfiguration, snapshotSupplierFactory); } ////////////////////////////////////////////////////////// /// We have to override methods from base class /// /// to include them in javadoc generated by sbt-unidoc /// ////////////////////////////////////////////////////////// /** * Specifies a {@link List} of column names that should be read from Delta table. If this method * is not used, Source will read all columns from Delta table. *

* If provided List is null or contains null, empty or blank elements it will throw a * {@code DeltaSourceValidationException} by builder after calling {@code build()} method. * * @param columnNames column names that should be read. */ @Override public RowDataBoundedDeltaSourceBuilder columnNames(List columnNames) { return super.columnNames(columnNames); } /** * Specifies an array of column names that should be read from Delta table. If this method * is not used, Source will read all columns from Delta table. *

* If provided List is null or contains null, empty or blank elements it will throw a * {@code DeltaSourceValidationException} by builder after calling {@code build()} method. * * @param columnNames column names that should be read. */ public RowDataBoundedDeltaSourceBuilder columnNames(String... columnNames) { return super.columnNames(Arrays.asList(columnNames)); } /** * Sets value of "versionAsOf" option. With this option we will load the given table version and * read from it. * *

* This option is mutually exclusive with {@link #timestampAsOf(String)} option. * * @param snapshotVersion Delta table version to time travel to. */ @Override public RowDataBoundedDeltaSourceBuilder versionAsOf(long snapshotVersion) { return super.versionAsOf(snapshotVersion); } /** * Sets value of "timestampAsOf" option. With this option we will load the latest table version * that was generated at or before the given timestamp. *

* This option is mutually exclusive with {@link #versionAsOf(long)} option. * * @param snapshotTimestamp The timestamp we should time travel to. Supported formats are: *

    *
  • 2022-02-24
  • *
  • 2022-02-24 04:55:00
  • *
  • 2022-02-24 04:55:00.001
  • *
  • 2022-02-24T04:55:00
  • *
  • 2022-02-24T04:55:00.001
  • *
  • 2022-02-24T04:55:00.001Z
  • *
*/ @Override public RowDataBoundedDeltaSourceBuilder timestampAsOf(String snapshotTimestamp) { return super.timestampAsOf(snapshotTimestamp); } /** * Sets a configuration option. * * @param optionName Option name to set. * @param optionValue Option {@link String} value to set. */ @Override public RowDataBoundedDeltaSourceBuilder option(String optionName, String optionValue) { return super.option(optionName, optionValue); } /** * Sets a configuration option. * * @param optionName Option name to set. * @param optionValue Option boolean value to set. */ @Override public RowDataBoundedDeltaSourceBuilder option(String optionName, boolean optionValue) { return super.option(optionName, optionValue); } /** * Sets a configuration option. * * @param optionName Option name to set. * @param optionValue Option int value to set. */ @Override public RowDataBoundedDeltaSourceBuilder option(String optionName, int optionValue) { return super.option(optionName, optionValue); } /** * Sets a configuration option. * * @param optionName Option name to set. * @param optionValue Option long value to set. */ @Override public RowDataBoundedDeltaSourceBuilder option(String optionName, long optionValue) { return super.option(optionName, optionValue); } /** * Creates an instance of {@link DeltaSource} for a stream of {@link RowData}. Created source * will work in Bounded mode, meaning it will read the content of the configured Delta snapshot * at the fixed version, ignoring all changes done to this table after starting this source. * *

* This method can throw {@code DeltaSourceValidationException} in case of invalid arguments * passed to Delta source builder. * * @return New {@link DeltaSource} instance. */ @Override @SuppressWarnings("unchecked") public DeltaSource build() { validate(); // In this step, the Delta table schema discovery is made. // We load the snapshot corresponding to the latest/versionAsOf/timestampAsOf commit. // We are using this snapshot to extract the metadata and discover table's column names // and data types. SourceSchema sourceSchema = getSourceSchema(); sourceConfiguration.addOption( LOADED_SCHEMA_SNAPSHOT_VERSION, sourceSchema.getSnapshotVersion() ); DeltaBulkFormat format = RowDataFormat.builder( RowType.of(sourceSchema.getColumnTypes(), sourceSchema.getColumnNames()), hadoopConfiguration) .partitionColumns(sourceSchema.getPartitionColumns()) .build(); return new DeltaSource<>( tablePath, format, DEFAULT_BOUNDED_SPLIT_ENUMERATOR_PROVIDER, hadoopConfiguration, sourceConfiguration); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy