All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.flink.source.IcebergSource Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg.flink.source;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import org.apache.flink.annotation.Experimental;
import org.apache.flink.api.connector.source.Boundedness;
import org.apache.flink.api.connector.source.Source;
import org.apache.flink.api.connector.source.SourceReader;
import org.apache.flink.api.connector.source.SourceReaderContext;
import org.apache.flink.api.connector.source.SplitEnumerator;
import org.apache.flink.api.connector.source.SplitEnumeratorContext;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.data.RowData;
import org.apache.flink.util.Preconditions;
import org.apache.iceberg.BaseMetadataTable;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.flink.FlinkConfigOptions;
import org.apache.iceberg.flink.FlinkReadConf;
import org.apache.iceberg.flink.FlinkReadOptions;
import org.apache.iceberg.flink.FlinkSchemaUtil;
import org.apache.iceberg.flink.TableLoader;
import org.apache.iceberg.flink.source.assigner.OrderedSplitAssignerFactory;
import org.apache.iceberg.flink.source.assigner.SimpleSplitAssignerFactory;
import org.apache.iceberg.flink.source.assigner.SplitAssigner;
import org.apache.iceberg.flink.source.assigner.SplitAssignerFactory;
import org.apache.iceberg.flink.source.enumerator.ContinuousIcebergEnumerator;
import org.apache.iceberg.flink.source.enumerator.ContinuousSplitPlanner;
import org.apache.iceberg.flink.source.enumerator.ContinuousSplitPlannerImpl;
import org.apache.iceberg.flink.source.enumerator.IcebergEnumeratorState;
import org.apache.iceberg.flink.source.enumerator.IcebergEnumeratorStateSerializer;
import org.apache.iceberg.flink.source.enumerator.StaticIcebergEnumerator;
import org.apache.iceberg.flink.source.reader.ColumnStatsWatermarkExtractor;
import org.apache.iceberg.flink.source.reader.IcebergSourceReader;
import org.apache.iceberg.flink.source.reader.IcebergSourceReaderMetrics;
import org.apache.iceberg.flink.source.reader.MetaDataReaderFunction;
import org.apache.iceberg.flink.source.reader.ReaderFunction;
import org.apache.iceberg.flink.source.reader.RowDataReaderFunction;
import org.apache.iceberg.flink.source.reader.SerializableRecordEmitter;
import org.apache.iceberg.flink.source.reader.SplitWatermarkExtractor;
import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
import org.apache.iceberg.flink.source.split.IcebergSourceSplitSerializer;
import org.apache.iceberg.flink.source.split.SerializableComparator;
import org.apache.iceberg.flink.source.split.SplitComparators;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.util.ThreadPools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Experimental
public class IcebergSource implements Source {
  private static final Logger LOG = LoggerFactory.getLogger(IcebergSource.class);

  // This table loader can be closed, and it is only safe to use this instance for resource
  // independent information (e.g. a table name). Copies of this are required to avoid lifecycle
  // management conflicts with the user provided table loader. e.g. a copy of this is required for
  // split planning, which uses the underlying io, and should be closed after split planning is
  // complete.
  private final TableLoader tableLoader;
  private final ScanContext scanContext;
  private final ReaderFunction readerFunction;
  private final SplitAssignerFactory assignerFactory;
  private final SerializableComparator splitComparator;
  private final SerializableRecordEmitter emitter;
  private final String tableName;

  IcebergSource(
      TableLoader tableLoader,
      ScanContext scanContext,
      ReaderFunction readerFunction,
      SplitAssignerFactory assignerFactory,
      SerializableComparator splitComparator,
      Table table,
      SerializableRecordEmitter emitter) {
    Preconditions.checkNotNull(tableLoader, "tableLoader is required.");
    Preconditions.checkNotNull(readerFunction, "readerFunction is required.");
    Preconditions.checkNotNull(assignerFactory, "assignerFactory is required.");
    Preconditions.checkNotNull(table, "table is required.");
    this.tableLoader = tableLoader;
    this.scanContext = scanContext;
    this.readerFunction = readerFunction;
    this.assignerFactory = assignerFactory;
    this.splitComparator = splitComparator;
    this.emitter = emitter;
    this.tableName = table.name();
  }

  String name() {
    return "IcebergSource-" + tableName;
  }

  private String planningThreadName() {
    // Ideally, operatorId should be used as the threadPoolName as Flink guarantees its uniqueness
    // within a job. SplitEnumeratorContext doesn't expose the OperatorCoordinator.Context, which
    // would contain the OperatorID. Need to discuss with Flink community whether it is ok to expose
    // a public API like the protected method "OperatorCoordinator.Context getCoordinatorContext()"
    // from SourceCoordinatorContext implementation. For now, - is used as
    // the unique thread pool name.
    return tableName + "-" + UUID.randomUUID();
  }

  private List planSplitsForBatch(String threadName) {
    ExecutorService workerPool =
        ThreadPools.newWorkerPool(threadName, scanContext.planParallelism());
    try (TableLoader loader = tableLoader.clone()) {
      loader.open();
      List splits =
          FlinkSplitPlanner.planIcebergSourceSplits(loader.loadTable(), scanContext, workerPool);
      LOG.info(
          "Discovered {} splits from table {} during job initialization", splits.size(), tableName);
      return splits;
    } catch (IOException e) {
      throw new UncheckedIOException("Failed to close table loader", e);
    } finally {
      workerPool.shutdown();
    }
  }

  @Override
  public Boundedness getBoundedness() {
    return scanContext.isStreaming() ? Boundedness.CONTINUOUS_UNBOUNDED : Boundedness.BOUNDED;
  }

  @Override
  public SourceReader createReader(SourceReaderContext readerContext) {
    IcebergSourceReaderMetrics metrics =
        new IcebergSourceReaderMetrics(readerContext.metricGroup(), tableName);
    return new IcebergSourceReader<>(
        emitter, metrics, readerFunction, splitComparator, readerContext);
  }

  @Override
  public SplitEnumerator createEnumerator(
      SplitEnumeratorContext enumContext) {
    return createEnumerator(enumContext, null);
  }

  @Override
  public SplitEnumerator restoreEnumerator(
      SplitEnumeratorContext enumContext, IcebergEnumeratorState enumState) {
    return createEnumerator(enumContext, enumState);
  }

  @Override
  public SimpleVersionedSerializer getSplitSerializer() {
    return new IcebergSourceSplitSerializer(scanContext.caseSensitive());
  }

  @Override
  public SimpleVersionedSerializer getEnumeratorCheckpointSerializer() {
    return new IcebergEnumeratorStateSerializer(scanContext.caseSensitive());
  }

  private SplitEnumerator createEnumerator(
      SplitEnumeratorContext enumContext,
      @Nullable IcebergEnumeratorState enumState) {
    SplitAssigner assigner;
    if (enumState == null) {
      assigner = assignerFactory.createAssigner();
    } else {
      LOG.info(
          "Iceberg source restored {} splits from state for table {}",
          enumState.pendingSplits().size(),
          tableName);
      assigner = assignerFactory.createAssigner(enumState.pendingSplits());
    }
    if (scanContext.isStreaming()) {
      ContinuousSplitPlanner splitPlanner =
          new ContinuousSplitPlannerImpl(tableLoader, scanContext, planningThreadName());
      return new ContinuousIcebergEnumerator(
          enumContext, assigner, scanContext, splitPlanner, enumState);
    } else {
      if (enumState == null) {
        // Only do scan planning if nothing is restored from checkpoint state
        List splits = planSplitsForBatch(planningThreadName());
        assigner.onDiscoveredSplits(splits);
      }

      return new StaticIcebergEnumerator(enumContext, assigner);
    }
  }

  public static  Builder builder() {
    return new Builder<>();
  }

  public static Builder forRowData() {
    return new Builder<>();
  }

  public static class Builder {
    private TableLoader tableLoader;
    private Table table;
    private SplitAssignerFactory splitAssignerFactory;
    private SerializableComparator splitComparator;
    private ReaderFunction readerFunction;
    private ReadableConfig flinkConfig = new Configuration();
    private final ScanContext.Builder contextBuilder = ScanContext.builder();
    private TableSchema projectedFlinkSchema;
    private Boolean exposeLocality;

    private final Map readOptions = Maps.newHashMap();

    Builder() {}

    public Builder tableLoader(TableLoader loader) {
      this.tableLoader = loader;
      return this;
    }

    public Builder table(Table newTable) {
      this.table = newTable;
      return this;
    }

    public Builder assignerFactory(SplitAssignerFactory assignerFactory) {
      this.splitAssignerFactory = assignerFactory;
      return this;
    }

    public Builder splitComparator(
        SerializableComparator newSplitComparator) {
      this.splitComparator = newSplitComparator;
      return this;
    }

    public Builder readerFunction(ReaderFunction newReaderFunction) {
      this.readerFunction = newReaderFunction;
      return this;
    }

    public Builder flinkConfig(ReadableConfig config) {
      this.flinkConfig = config;
      return this;
    }

    public Builder caseSensitive(boolean newCaseSensitive) {
      readOptions.put(FlinkReadOptions.CASE_SENSITIVE, Boolean.toString(newCaseSensitive));
      return this;
    }

    public Builder useSnapshotId(Long newSnapshotId) {
      if (newSnapshotId != null) {
        readOptions.put(FlinkReadOptions.SNAPSHOT_ID.key(), Long.toString(newSnapshotId));
      }
      return this;
    }

    public Builder streamingStartingStrategy(StreamingStartingStrategy newStartingStrategy) {
      readOptions.put(FlinkReadOptions.STARTING_STRATEGY, newStartingStrategy.name());
      return this;
    }

    public Builder startSnapshotTimestamp(Long newStartSnapshotTimestamp) {
      if (newStartSnapshotTimestamp != null) {
        readOptions.put(
            FlinkReadOptions.START_SNAPSHOT_TIMESTAMP.key(),
            Long.toString(newStartSnapshotTimestamp));
      }
      return this;
    }

    public Builder startSnapshotId(Long newStartSnapshotId) {
      if (newStartSnapshotId != null) {
        readOptions.put(
            FlinkReadOptions.START_SNAPSHOT_ID.key(), Long.toString(newStartSnapshotId));
      }
      return this;
    }

    public Builder tag(String tag) {
      readOptions.put(FlinkReadOptions.TAG.key(), tag);
      return this;
    }

    public Builder branch(String branch) {
      readOptions.put(FlinkReadOptions.BRANCH.key(), branch);
      return this;
    }

    public Builder startTag(String startTag) {
      readOptions.put(FlinkReadOptions.START_TAG.key(), startTag);
      return this;
    }

    public Builder endTag(String endTag) {
      readOptions.put(FlinkReadOptions.END_TAG.key(), endTag);
      return this;
    }

    public Builder endSnapshotId(Long newEndSnapshotId) {
      if (newEndSnapshotId != null) {
        readOptions.put(FlinkReadOptions.END_SNAPSHOT_ID.key(), Long.toString(newEndSnapshotId));
      }
      return this;
    }

    public Builder asOfTimestamp(Long newAsOfTimestamp) {
      if (newAsOfTimestamp != null) {
        readOptions.put(FlinkReadOptions.AS_OF_TIMESTAMP.key(), Long.toString(newAsOfTimestamp));
      }
      return this;
    }

    public Builder splitSize(Long newSplitSize) {
      if (newSplitSize != null) {
        readOptions.put(FlinkReadOptions.SPLIT_SIZE, Long.toString(newSplitSize));
      }
      return this;
    }

    public Builder splitLookback(Integer newSplitLookback) {
      if (newSplitLookback != null) {
        readOptions.put(FlinkReadOptions.SPLIT_LOOKBACK, Integer.toString(newSplitLookback));
      }
      return this;
    }

    public Builder splitOpenFileCost(Long newSplitOpenFileCost) {
      if (newSplitOpenFileCost != null) {
        readOptions.put(FlinkReadOptions.SPLIT_FILE_OPEN_COST, Long.toString(newSplitOpenFileCost));
      }

      return this;
    }

    public Builder streaming(boolean streaming) {
      readOptions.put(FlinkReadOptions.STREAMING, Boolean.toString(streaming));
      return this;
    }

    public Builder monitorInterval(Duration newMonitorInterval) {
      if (newMonitorInterval != null) {
        readOptions.put(FlinkReadOptions.MONITOR_INTERVAL, newMonitorInterval.toNanos() + " ns");
      }
      return this;
    }

    public Builder nameMapping(String newNameMapping) {
      readOptions.put(TableProperties.DEFAULT_NAME_MAPPING, newNameMapping);
      return this;
    }

    public Builder project(Schema newProjectedSchema) {
      this.contextBuilder.project(newProjectedSchema);
      return this;
    }

    public Builder project(TableSchema newProjectedFlinkSchema) {
      this.projectedFlinkSchema = newProjectedFlinkSchema;
      return this;
    }

    public Builder filters(List newFilters) {
      this.contextBuilder.filters(newFilters);
      return this;
    }

    public Builder limit(Long newLimit) {
      if (newLimit != null) {
        readOptions.put(FlinkReadOptions.LIMIT, Long.toString(newLimit));
      }
      return this;
    }

    public Builder includeColumnStats(boolean newIncludeColumnStats) {
      readOptions.put(
          FlinkReadOptions.INCLUDE_COLUMN_STATS, Boolean.toString(newIncludeColumnStats));
      return this;
    }

    public Builder planParallelism(int planParallelism) {
      readOptions.put(
          FlinkConfigOptions.TABLE_EXEC_ICEBERG_WORKER_POOL_SIZE.key(),
          Integer.toString(planParallelism));
      return this;
    }

    public Builder exposeLocality(boolean newExposeLocality) {
      this.exposeLocality = newExposeLocality;
      return this;
    }

    public Builder maxAllowedPlanningFailures(int maxAllowedPlanningFailures) {
      readOptions.put(
          FlinkReadOptions.MAX_ALLOWED_PLANNING_FAILURES_OPTION.key(),
          Integer.toString(maxAllowedPlanningFailures));
      return this;
    }

    /**
     * Set the read properties for Flink source. View the supported properties in {@link
     * FlinkReadOptions}
     */
    public Builder set(String property, String value) {
      readOptions.put(property, value);
      return this;
    }

    /**
     * Set the read properties for Flink source. View the supported properties in {@link
     * FlinkReadOptions}
     */
    public Builder setAll(Map properties) {
      readOptions.putAll(properties);
      return this;
    }

    /**
     * Emits watermarks once per split based on the min value of column statistics from files
     * metadata in the given split. The generated watermarks are also used for ordering the splits
     * for read. Accepted column types are timestamp/timestamptz/long. For long columns consider
     * setting {@link #watermarkColumnTimeUnit(TimeUnit)}.
     *
     * 

Consider setting `read.split.open-file-cost` to prevent combining small files to a single * split when the watermark is used for watermark alignment. */ public Builder watermarkColumn(String columnName) { Preconditions.checkArgument( splitAssignerFactory == null, "Watermark column and SplitAssigner should not be set in the same source"); readOptions.put(FlinkReadOptions.WATERMARK_COLUMN, columnName); return this; } /** * When the type of the {@link #watermarkColumn} is {@link * org.apache.iceberg.types.Types.LongType}, then sets the {@link TimeUnit} to convert the * value. The default value is {@link TimeUnit#MICROSECONDS}. */ public Builder watermarkColumnTimeUnit(TimeUnit timeUnit) { readOptions.put(FlinkReadOptions.WATERMARK_COLUMN_TIME_UNIT, timeUnit.name()); return this; } /** @deprecated Use {@link #setAll} instead. */ @Deprecated public Builder properties(Map properties) { readOptions.putAll(properties); return this; } public IcebergSource build() { if (table == null) { try (TableLoader loader = tableLoader) { loader.open(); this.table = tableLoader.loadTable(); } catch (IOException e) { throw new UncheckedIOException(e); } } contextBuilder.resolveConfig(table, readOptions, flinkConfig); Schema icebergSchema = table.schema(); if (projectedFlinkSchema != null) { contextBuilder.project(FlinkSchemaUtil.convert(icebergSchema, projectedFlinkSchema)); } SerializableRecordEmitter emitter = SerializableRecordEmitter.defaultEmitter(); FlinkReadConf flinkReadConf = new FlinkReadConf(table, readOptions, flinkConfig); String watermarkColumn = flinkReadConf.watermarkColumn(); TimeUnit watermarkTimeUnit = flinkReadConf.watermarkColumnTimeUnit(); if (watermarkColumn != null) { // Column statistics is needed for watermark generation contextBuilder.includeColumnStats(Sets.newHashSet(watermarkColumn)); SplitWatermarkExtractor watermarkExtractor = new ColumnStatsWatermarkExtractor(icebergSchema, watermarkColumn, watermarkTimeUnit); emitter = SerializableRecordEmitter.emitterWithWatermark(watermarkExtractor); splitAssignerFactory = new OrderedSplitAssignerFactory(SplitComparators.watermark(watermarkExtractor)); } ScanContext context = contextBuilder.build(); context.validate(); if (readerFunction == null) { if (table instanceof BaseMetadataTable) { MetaDataReaderFunction rowDataReaderFunction = new MetaDataReaderFunction( flinkConfig, table.schema(), context.project(), table.io(), table.encryption()); this.readerFunction = (ReaderFunction) rowDataReaderFunction; } else { RowDataReaderFunction rowDataReaderFunction = new RowDataReaderFunction( flinkConfig, table.schema(), context.project(), context.nameMapping(), context.caseSensitive(), table.io(), table.encryption(), context.filters()); this.readerFunction = (ReaderFunction) rowDataReaderFunction; } } if (splitAssignerFactory == null) { if (splitComparator == null) { splitAssignerFactory = new SimpleSplitAssignerFactory(); } else { splitAssignerFactory = new OrderedSplitAssignerFactory(splitComparator); } } // Since builder already load the table, pass it to the source to avoid double loading return new IcebergSource<>( tableLoader, context, readerFunction, splitAssignerFactory, splitComparator, table, emitter); } } }