All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.inlong.sort.filesystem.FileSystemTableSink Maven / Gradle / Ivy

There is a newer version: 1.13.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.inlong.sort.filesystem;

import org.apache.inlong.sort.base.dirty.DirtyOptions;
import org.apache.inlong.sort.base.dirty.sink.DirtySink;
import org.apache.inlong.sort.filesystem.stream.StreamingSink;

import org.apache.flink.api.common.io.FileInputFormat;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.serialization.BulkWriter;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.serialization.Encoder;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.connector.file.src.FileSourceSplit;
import org.apache.flink.connector.file.src.reader.BulkFormat;
import org.apache.flink.core.fs.FSDataOutputStream;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner;
import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
import org.apache.flink.streaming.api.functions.sink.filesystem.PartFileInfo;
import org.apache.flink.streaming.api.functions.sink.filesystem.RollingPolicy;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink.BucketsBuilder;
import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.SimpleVersionedStringSerializer;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.CheckpointRollingPolicy;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.ValidationException;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.format.DecodingFormat;
import org.apache.flink.table.connector.format.EncodingFormat;
import org.apache.flink.table.connector.sink.DataStreamSinkProvider;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.sink.abilities.SupportsOverwrite;
import org.apache.flink.table.connector.sink.abilities.SupportsPartitioning;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.expressions.ResolvedExpression;
import org.apache.flink.table.factories.DynamicTableFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.factories.FileSystemFormatFactory;
import org.apache.flink.table.filesystem.DeserializationSchemaAdapter;
import org.apache.flink.table.filesystem.EmptyMetaStoreFactory;
import org.apache.flink.table.filesystem.FileSystemFactory;
import org.apache.flink.table.filesystem.FileSystemOptions;
import org.apache.flink.table.filesystem.FileSystemOutputFormat;
import org.apache.flink.table.filesystem.OutputFormatFactory;
import org.apache.flink.table.filesystem.PartitionComputer;
import org.apache.flink.table.filesystem.RowDataPartitionComputer;
import org.apache.flink.table.filesystem.SerializationSchemaAdapter;
import org.apache.flink.table.filesystem.stream.PartitionCommitInfo;
import org.apache.flink.table.filesystem.stream.compact.CompactBulkReader;
import org.apache.flink.table.filesystem.stream.compact.CompactReader;
import org.apache.flink.table.filesystem.stream.compact.FileInputFormatCompactReader;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.utils.PartitionPathUtils;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Preconditions;

import javax.annotation.Nullable;

import java.io.IOException;
import java.io.OutputStream;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import static org.apache.flink.table.filesystem.FileSystemOptions.SINK_ROLLING_POLICY_CHECK_INTERVAL;
import static org.apache.flink.table.filesystem.FileSystemOptions.SINK_ROLLING_POLICY_FILE_SIZE;
import static org.apache.flink.table.filesystem.FileSystemOptions.SINK_ROLLING_POLICY_ROLLOVER_INTERVAL;
import static org.apache.flink.table.filesystem.stream.compact.CompactOperator.convertToUncompacted;
import static org.apache.inlong.sort.base.Constants.IGNORE_ALL_CHANGELOG;
import static org.apache.inlong.sort.base.Constants.INLONG_AUDIT;
import static org.apache.inlong.sort.base.Constants.INLONG_METRIC;

/**
 * File system {@link DynamicTableSink}.
 */
public class FileSystemTableSink extends AbstractFileSystemTable
        implements
            DynamicTableSink,
            SupportsPartitioning,
            SupportsOverwrite {

    // For compaction reading
    @Nullable
    private final DecodingFormat> bulkReaderFormat;
    @Nullable
    private final DecodingFormat> deserializationFormat;
    @Nullable
    private final FileSystemFormatFactory formatFactory;

    // For Writing
    @Nullable
    private final EncodingFormat> bulkWriterFormat;
    @Nullable
    private final EncodingFormat> serializationFormat;

    private boolean overwrite = false;
    private boolean dynamicGrouping = false;
    private LinkedHashMap staticPartitions = new LinkedHashMap<>();

    @Nullable
    private final Integer configuredParallelism;

    private final String inlongMetric;
    private final String inlongAudit;
    private final DirtyOptions dirtyOptions;
    private @Nullable final DirtySink dirtySink;

    FileSystemTableSink(
            DynamicTableFactory.Context context,
            @Nullable DecodingFormat> bulkReaderFormat,
            @Nullable DecodingFormat> deserializationFormat,
            @Nullable FileSystemFormatFactory formatFactory,
            @Nullable EncodingFormat> bulkWriterFormat,
            @Nullable EncodingFormat> serializationFormat,
            DirtyOptions dirtyOptions,
            @Nullable DirtySink dirtySink) {
        super(context);
        this.bulkReaderFormat = bulkReaderFormat;
        this.deserializationFormat = deserializationFormat;
        this.formatFactory = formatFactory;
        if (Stream.of(bulkWriterFormat, serializationFormat, formatFactory)
                .allMatch(Objects::isNull)) {
            Configuration options = Configuration.fromMap(context.getCatalogTable().getOptions());
            String identifier = options.get(FactoryUtil.FORMAT);
            throw new ValidationException(
                    String.format(
                            "Could not find any format factory for identifier '%s' in the classpath.",
                            identifier));
        }
        this.bulkWriterFormat = bulkWriterFormat;
        this.serializationFormat = serializationFormat;
        this.configuredParallelism = tableOptions.get(FileSystemOptions.SINK_PARALLELISM);
        this.inlongMetric = tableOptions.get(INLONG_METRIC);
        this.inlongAudit = tableOptions.get(INLONG_AUDIT);
        this.dirtyOptions = dirtyOptions;
        this.dirtySink = dirtySink;
    }

    @Override
    public SinkRuntimeProvider getSinkRuntimeProvider(Context sinkContext) {
        return (DataStreamSinkProvider) dataStream -> consume(dataStream, sinkContext);
    }

    private DataStreamSink consume(DataStream dataStream, Context sinkContext) {
        final int inputParallelism = dataStream.getParallelism();
        final int parallelism = Optional.ofNullable(configuredParallelism).orElse(inputParallelism);

        if (sinkContext.isBounded()) {
            return createBatchSink(dataStream, sinkContext, parallelism);
        } else {
            if (overwrite) {
                throw new IllegalStateException("Streaming mode not support overwrite.");
            }

            return createStreamingSink(dataStream, sinkContext, parallelism);
        }
    }

    private RowDataPartitionComputer partitionComputer() {
        return new RowDataPartitionComputer(
                defaultPartName,
                schema.getFieldNames(),
                schema.getFieldDataTypes(),
                partitionKeys.toArray(new String[0]));
    }

    private DataStreamSink createBatchSink(
            DataStream inputStream, Context sinkContext, final int parallelism) {
        FileSystemOutputFormat.Builder builder = new FileSystemOutputFormat.Builder<>();
        builder.setPartitionComputer(partitionComputer());
        builder.setDynamicGrouped(dynamicGrouping);
        builder.setPartitionColumns(partitionKeys.toArray(new String[0]));
        builder.setFormatFactory(createOutputFormatFactory(sinkContext));
        builder.setMetaStoreFactory(new EmptyMetaStoreFactory(path));
        builder.setOverwrite(overwrite);
        builder.setStaticPartitions(staticPartitions);
        builder.setTempPath(toStagingPath());
        builder.setOutputFileConfig(
                OutputFileConfig.builder()
                        .withPartPrefix("part-" + UUID.randomUUID().toString())
                        .build());
        return inputStream
                .writeUsingOutputFormat(builder.build())
                .setParallelism(parallelism)
                .name("Filesystem");
    }

    private DataStreamSink createStreamingSink(
            DataStream dataStream, Context sinkContext, final int parallelism) {
        FileSystemFactory fsFactory = FileSystem::get;
        RowDataPartitionComputer computer = partitionComputer();

        boolean autoCompaction = tableOptions.getBoolean(FileSystemOptions.AUTO_COMPACTION);
        Object writer = createWriter(sinkContext);
        boolean isEncoder = writer instanceof Encoder;
        TableBucketAssigner assigner = new TableBucketAssigner(computer);
        TableRollingPolicy rollingPolicy =
                new TableRollingPolicy(
                        !isEncoder || autoCompaction,
                        tableOptions.get(SINK_ROLLING_POLICY_FILE_SIZE).getBytes(),
                        tableOptions.get(SINK_ROLLING_POLICY_ROLLOVER_INTERVAL).toMillis());

        String randomPrefix = "part-" + UUID.randomUUID().toString();
        OutputFileConfig.OutputFileConfigBuilder fileNamingBuilder = OutputFileConfig.builder();
        fileNamingBuilder =
                autoCompaction
                        ? fileNamingBuilder.withPartPrefix(convertToUncompacted(randomPrefix))
                        : fileNamingBuilder.withPartPrefix(randomPrefix);
        OutputFileConfig fileNamingConfig = fileNamingBuilder.build();

        BucketsBuilder> bucketsBuilder;
        if (isEncoder) {
            // noinspection unchecked
            bucketsBuilder =
                    StreamingFileSink.forRowFormat(
                            path,
                            new ProjectionEncoder((Encoder) writer, computer))
                            .withBucketAssigner(assigner)
                            .withOutputFileConfig(fileNamingConfig)
                            .withRollingPolicy(rollingPolicy);
        } else {
            // noinspection unchecked
            bucketsBuilder =
                    StreamingFileSink.forBulkFormat(
                            path,
                            new ProjectionBulkFactory(
                                    (BulkWriter.Factory) writer, computer))
                            .withBucketAssigner(assigner)
                            .withOutputFileConfig(fileNamingConfig)
                            .withRollingPolicy(rollingPolicy);
        }

        long bucketCheckInterval = tableOptions.get(SINK_ROLLING_POLICY_CHECK_INTERVAL).toMillis();

        DataStream writerStream;
        if (autoCompaction) {
            long compactionSize =
                    tableOptions
                            .getOptional(FileSystemOptions.COMPACTION_FILE_SIZE)
                            .orElse(tableOptions.get(SINK_ROLLING_POLICY_FILE_SIZE))
                            .getBytes();

            CompactReader.Factory reader =
                    createCompactReaderFactory(sinkContext)
                            .orElseThrow(
                                    () -> new TableException(
                                            "Please implement available reader for compaction:"
                                                    + " BulkFormat, FileInputFormat."));

            writerStream =
                    StreamingSink.compactionWriter(
                            dataStream,
                            bucketCheckInterval,
                            bucketsBuilder,
                            fsFactory,
                            path,
                            reader,
                            compactionSize,
                            parallelism,
                            inlongMetric,
                            inlongAudit,
                            dirtyOptions,
                            dirtySink);
        } else {
            writerStream =
                    StreamingSink.writer(
                            dataStream, bucketCheckInterval, bucketsBuilder, parallelism,
                            inlongMetric, inlongAudit, dirtyOptions, dirtySink);
        }

        return StreamingSink.sink(
                writerStream,
                path,
                tableIdentifier,
                partitionKeys,
                new EmptyMetaStoreFactory(path),
                fsFactory,
                tableOptions);
    }

    private Optional> createCompactReaderFactory(Context context) {
        DataType producedDataType = schema.toRowDataType();
        if (bulkReaderFormat != null) {
            BulkFormat format =
                    bulkReaderFormat.createRuntimeDecoder(
                            createSourceContext(context), producedDataType);
            return Optional.of(CompactBulkReader.factory(format));
        } else if (formatFactory != null) {
            InputFormat format = formatFactory.createReader(createReaderContext());
            if (format instanceof FileInputFormat) {
                // noinspection unchecked
                return Optional.of(
                        FileInputFormatCompactReader.factory((FileInputFormat) format));
            }
        } else if (deserializationFormat != null) {
            // NOTE, we need pass full format types to deserializationFormat
            DeserializationSchema decoder =
                    deserializationFormat.createRuntimeDecoder(
                            createSourceContext(context), getFormatDataType());
            int[] projectedFields = IntStream.range(0, schema.getFieldCount()).toArray();
            DeserializationSchemaAdapter format =
                    new DeserializationSchemaAdapter(
                            decoder, schema, projectedFields, partitionKeys, defaultPartName);
            return Optional.of(CompactBulkReader.factory(format));
        }
        return Optional.empty();
    }

    private DynamicTableSource.Context createSourceContext(Context context) {
        return new DynamicTableSource.Context() {

            @Override
            public  TypeInformation createTypeInformation(DataType producedDataType) {
                return context.createTypeInformation(producedDataType);
            }

            @Override
            public DynamicTableSource.DataStructureConverter createDataStructureConverter(
                    DataType producedDataType) {
                throw new TableException("Compaction reader not support DataStructure converter.");
            }
        };
    }

    private FileSystemFormatFactory.ReaderContext createReaderContext() {
        return new FileSystemFormatFactory.ReaderContext() {

            @Override
            public TableSchema getSchema() {
                return schema;
            }

            @Override
            public ReadableConfig getFormatOptions() {
                return formatOptions(formatFactory.factoryIdentifier());
            }

            @Override
            public List getPartitionKeys() {
                return partitionKeys;
            }

            @Override
            public String getDefaultPartName() {
                return defaultPartName;
            }

            @Override
            public Path[] getPaths() {
                return new Path[]{path};
            }

            @Override
            public int[] getProjectFields() {
                return IntStream.range(0, schema.getFieldCount()).toArray();
            }

            @Override
            public long getPushedDownLimit() {
                return Long.MAX_VALUE;
            }

            @Override
            public List getPushedDownFilters() {
                return Collections.emptyList();
            }
        };
    }

    private Path toStagingPath() {
        Path stagingDir = new Path(path, ".staging_" + System.currentTimeMillis());
        try {
            FileSystem fs = stagingDir.getFileSystem();
            Preconditions.checkState(
                    fs.exists(stagingDir) || fs.mkdirs(stagingDir),
                    "Failed to create staging dir " + stagingDir);
            return stagingDir;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @SuppressWarnings("unchecked")
    private OutputFormatFactory createOutputFormatFactory(Context sinkContext) {
        Object writer = createWriter(sinkContext);
        return writer instanceof Encoder
                ? path -> createEncoderOutputFormat((Encoder) writer, path)
                : path -> createBulkWriterOutputFormat((BulkWriter.Factory) writer, path);
    }

    private Object createWriter(Context sinkContext) {
        if (bulkWriterFormat != null) {
            return bulkWriterFormat.createRuntimeEncoder(sinkContext, getFormatDataType());
        } else if (serializationFormat != null) {
            return new SerializationSchemaAdapter(
                    serializationFormat.createRuntimeEncoder(sinkContext, getFormatDataType()));
        } else {
            throw new TableException("Can not find format factory.");
        }
    }

    private void checkConfiguredParallelismAllowed(ChangelogMode requestChangelogMode) {
        final Integer parallelism = this.configuredParallelism;
        if (parallelism == null) {
            return;
        }
        if (!requestChangelogMode.containsOnly(RowKind.INSERT)) {
            throw new ValidationException(
                    String.format(
                            "Currently, filesystem sink doesn't support setting parallelism (%d) by '%s' "
                                    + "when the input stream is not INSERT only. The row kinds of input stream are "
                                    + "[%s]",
                            parallelism,
                            FileSystemOptions.SINK_PARALLELISM.key(),
                            requestChangelogMode.getContainedKinds().stream()
                                    .map(RowKind::shortString)
                                    .collect(Collectors.joining(","))));
        }
    }

    private static OutputFormat createBulkWriterOutputFormat(
            BulkWriter.Factory factory, Path path) {
        return new OutputFormat() {

            private static final long serialVersionUID = 1L;

            private transient BulkWriter writer;

            @Override
            public void configure(Configuration parameters) {
            }

            @Override
            public void open(int taskNumber, int numTasks) throws IOException {
                this.writer =
                        factory.create(
                                path.getFileSystem().create(path, FileSystem.WriteMode.OVERWRITE));
            }

            @Override
            public void writeRecord(RowData record) throws IOException {
                writer.addElement(record);
            }

            @Override
            public void close() throws IOException {
                writer.flush();
                writer.finish();
            }
        };
    }

    private static OutputFormat createEncoderOutputFormat(
            Encoder encoder, Path path) {
        return new OutputFormat() {

            private static final long serialVersionUID = 1L;

            private transient FSDataOutputStream output;

            @Override
            public void configure(Configuration parameters) {
            }

            @Override
            public void open(int taskNumber, int numTasks) throws IOException {
                this.output = path.getFileSystem().create(path, FileSystem.WriteMode.OVERWRITE);
            }

            @Override
            public void writeRecord(RowData record) throws IOException {
                encoder.encode(record, output);
            }

            @Override
            public void close() throws IOException {
                this.output.flush();
                this.output.close();
            }
        };
    }

    private LinkedHashMap toPartialLinkedPartSpec(Map part) {
        LinkedHashMap partSpec = new LinkedHashMap<>();
        for (String partitionKey : partitionKeys) {
            if (part.containsKey(partitionKey)) {
                partSpec.put(partitionKey, part.get(partitionKey));
            }
        }
        return partSpec;
    }

    @Override
    public boolean requiresPartitionGrouping(boolean supportsGrouping) {
        this.dynamicGrouping = supportsGrouping;
        return dynamicGrouping;
    }

    @Override
    public ChangelogMode getChangelogMode(ChangelogMode requestedMode) {
        checkConfiguredParallelismAllowed(requestedMode);
        boolean ignoreChangelog = tableOptions.get(IGNORE_ALL_CHANGELOG);
        if (ignoreChangelog) {
            return ChangelogMode.all();
        }
        if (bulkWriterFormat != null) {
            return bulkWriterFormat.getChangelogMode();
        } else if (serializationFormat != null) {
            return serializationFormat.getChangelogMode();
        } else {
            throw new TableException("Can not find format factory.");
        }
    }

    @Override
    public DynamicTableSink copy() {
        FileSystemTableSink sink =
                new FileSystemTableSink(
                        context,
                        bulkReaderFormat,
                        deserializationFormat,
                        formatFactory,
                        bulkWriterFormat,
                        serializationFormat,
                        dirtyOptions,
                        dirtySink);
        sink.overwrite = overwrite;
        sink.dynamicGrouping = dynamicGrouping;
        sink.staticPartitions = staticPartitions;
        return sink;
    }

    @Override
    public String asSummaryString() {
        return "Filesystem";
    }

    @Override
    public void applyOverwrite(boolean overwrite) {
        this.overwrite = overwrite;
    }

    @Override
    public void applyStaticPartition(Map partition) {
        this.staticPartitions = toPartialLinkedPartSpec(partition);
    }

    /**
     * Table bucket assigner, wrap {@link PartitionComputer}.
     */
    public static class TableBucketAssigner implements BucketAssigner {

        private final PartitionComputer computer;

        public TableBucketAssigner(PartitionComputer computer) {
            this.computer = computer;
        }

        @Override
        public String getBucketId(RowData element, Context context) {
            try {
                return PartitionPathUtils.generatePartitionPath(
                        computer.generatePartValues(element));
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public SimpleVersionedSerializer getSerializer() {
            return SimpleVersionedStringSerializer.INSTANCE;
        }
    }

    /**
     * Table {@link RollingPolicy}, it extends {@link CheckpointRollingPolicy} for bulk writers.
     */
    public static class TableRollingPolicy extends CheckpointRollingPolicy {

        private final boolean rollOnCheckpoint;
        private final long rollingFileSize;
        private final long rollingTimeInterval;

        public TableRollingPolicy(
                boolean rollOnCheckpoint, long rollingFileSize, long rollingTimeInterval) {
            this.rollOnCheckpoint = rollOnCheckpoint;
            Preconditions.checkArgument(rollingFileSize > 0L);
            Preconditions.checkArgument(rollingTimeInterval > 0L);
            this.rollingFileSize = rollingFileSize;
            this.rollingTimeInterval = rollingTimeInterval;
        }

        @Override
        public boolean shouldRollOnCheckpoint(PartFileInfo partFileState) {
            try {
                return rollOnCheckpoint || partFileState.getSize() > rollingFileSize;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public boolean shouldRollOnEvent(PartFileInfo partFileState, RowData element)
                throws IOException {
            return partFileState.getSize() > rollingFileSize;
        }

        @Override
        public boolean shouldRollOnProcessingTime(
                PartFileInfo partFileState, long currentTime) {
            return currentTime - partFileState.getCreationTime() >= rollingTimeInterval;
        }
    }

    private static class ProjectionEncoder implements Encoder {

        private final Encoder encoder;
        private final RowDataPartitionComputer computer;

        private ProjectionEncoder(Encoder encoder, RowDataPartitionComputer computer) {
            this.encoder = encoder;
            this.computer = computer;
        }

        @Override
        public void encode(RowData element, OutputStream stream) throws IOException {
            encoder.encode(computer.projectColumnsToWrite(element), stream);
        }
    }

    /**
     * Project row to non-partition fields.
     */
    public static class ProjectionBulkFactory implements BulkWriter.Factory {

        private final BulkWriter.Factory factory;
        private final RowDataPartitionComputer computer;

        public ProjectionBulkFactory(
                BulkWriter.Factory factory, RowDataPartitionComputer computer) {
            this.factory = factory;
            this.computer = computer;
        }

        @Override
        public BulkWriter create(FSDataOutputStream out) throws IOException {
            BulkWriter writer = factory.create(out);
            return new BulkWriter() {

                @Override
                public void addElement(RowData element) throws IOException {
                    writer.addElement(computer.projectColumnsToWrite(element));
                }

                @Override
                public void flush() throws IOException {
                    writer.flush();
                }

                @Override
                public void finish() throws IOException {
                    writer.finish();
                }
            };
        }
    }
}