All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.source.internal.state.DeltaSourceSplit Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
package io.delta.flink.source.internal.state;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import org.apache.flink.api.connector.source.SourceSplit;
import org.apache.flink.connector.file.src.FileSourceSplit;
import org.apache.flink.connector.file.src.util.CheckpointedPosition;
import org.apache.flink.core.fs.Path;
import org.apache.flink.util.StringUtils;

/**
 * A {@link SourceSplit} that represents a Parquet file, or a region of a file.
 *
 * 

The split additionally has an offset and an end, which defines the region of the file * represented by the split. For splits representing the while file, the offset is zero and the * length is the file size. * *

The split may furthermore have a "reader position", which is the checkpointed position from a * reader previously reading this split. This position is null when the split is assigned from the * enumerator to the readers, and is non-null when the reader's checkpoint their state in a file * source split. * *

This implementation extends a {@link FileSourceSplit} with Delta table partition * information

*/ public class DeltaSourceSplit extends FileSourceSplit { private static final String[] NO_HOSTS = StringUtils.EMPTY_STRING_ARRAY; /** * Map containing partition column name to partition column value mappings. This mapping is used * in scope of given Split. */ private final Map partitionValues; /** * Constructs a split with no host information and with no reader position. * * @param partitionValues The Delta partition column to partition value map that should be used * for underlying Parquet File. * @param id The unique ID of this source split. * @param filePath The path to the Parquet file that this splits represents. * @param offset The start (inclusive) of the split's rage in the Parquet file, in * bytes. * @param length The number of bytes in the split (starting from the offset) */ public DeltaSourceSplit(Map partitionValues, String id, Path filePath, long offset, long length) { this(partitionValues, id, filePath, offset, length, NO_HOSTS, null); } /** * Constructs a split with host information and no reader position. *

* The {@code hostnames} provides information about the names of the hosts is storing this range * of the file. Empty, if no host information is available. Host information is typically only * available on a specific file systems, like HDFS. * * @param partitionValues The Delta partition column to partition value map that should be used * for underlying Parquet File. * @param id The unique ID of this source split. * @param filePath The path to the Parquet file that this splits represents. * @param offset The start (inclusive) of the split's rage in the Parquet file, in * bytes. * @param length The number of bytes in the split (starting from the offset) * @param hostnames The hostnames of the nodes storing the split's file range. */ public DeltaSourceSplit(Map partitionValues, String id, Path filePath, long offset, long length, String... hostnames) { this(partitionValues, id, filePath, offset, length, hostnames, null); } /** * Constructs a split with host information and reader position restored from checkpoint. *

* The {@code hostnames} parameter provides information about the names of the hosts storing * this range of the file. Empty, if no host information is available. Host information is * typically only available on a specific file systems, like HDFS. * * @param partitionValues The Delta partition column to partition value map that should be used * for underlying Parquet File. * @param id The unique ID of this source split. * @param filePath The path to the Parquet file that this splits represents. * @param offset The start (inclusive) of the split's rage in the Parquet file, in * bytes. * @param length The number of bytes in the split (starting from the offset) * @param hostnames The hostnames of the nodes storing the split's file range. * @param readerPosition The reader position in bytes recovered from a checkpoint. */ public DeltaSourceSplit(Map partitionValues, String id, Path filePath, long offset, long length, String[] hostnames, CheckpointedPosition readerPosition) { super(id, filePath, offset, length, hostnames, readerPosition); // Make split Partition a new Copy of original map to for immutability. this.partitionValues = (partitionValues == null) ? Collections.emptyMap() : new HashMap<>(partitionValues); } @Override public DeltaSourceSplit updateWithCheckpointedPosition(CheckpointedPosition position) { return new DeltaSourceSplit(partitionValues, splitId(), path(), offset(), length(), hostnames(), position); } /** * @return an unmodifiable Map of Delta Table Partition columns and values. */ public Map getPartitionValues() { return Collections.unmodifiableMap(partitionValues); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy