All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.inlong.sort.base.dirty.sink.s3.S3DirtySinkFactory Maven / Gradle / Ivy

There is a newer version: 1.12.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.inlong.sort.base.dirty.sink.s3;

import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.api.ValidationException;
import org.apache.flink.table.factories.DynamicTableFactory.Context;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.inlong.sort.base.dirty.sink.DirtySink;
import org.apache.inlong.sort.base.dirty.sink.DirtySinkFactory;

import java.util.HashSet;
import java.util.Set;
import static org.apache.inlong.sort.base.Constants.DIRTY_IDENTIFIER;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_BATCH_BYTES;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_BATCH_INTERVAL;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_BATCH_SIZE;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_FIELD_DELIMITER;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_FORMAT;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_IGNORE_ERRORS;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_LINE_DELIMITER;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_LOG_ENABLE;
import static org.apache.inlong.sort.base.Constants.DIRTY_SIDE_OUTPUT_RETRIES;

/**
 * S3 dirty sink factory
 */
public class S3DirtySinkFactory implements DirtySinkFactory {

    private static final String IDENTIFIER = "s3";

    private static final ConfigOption DIRTY_SIDE_OUTPUT_ENDPOINT =
            ConfigOptions.key("dirty.side-output.s3.endpoint")
                    .stringType()
                    .noDefaultValue()
                    .withDescription("The endpoint of s3");
    private static final ConfigOption DIRTY_SIDE_OUTPUT_REGION =
            ConfigOptions.key("dirty.side-output.s3.region")
                    .stringType()
                    .noDefaultValue()
                    .withDescription("The region of s3");
    private static final ConfigOption DIRTY_SIDE_OUTPUT_BUCKET =
            ConfigOptions.key("dirty.side-output.s3.bucket")
                    .stringType()
                    .noDefaultValue()
                    .withDescription("The bucket of s3");
    private static final ConfigOption DIRTY_SIDE_OUTPUT_KEY =
            ConfigOptions.key("dirty.side-output.s3.key")
                    .stringType()
                    .noDefaultValue()
                    .withDescription("The key of s3");
    private static final ConfigOption DIRTY_SIDE_OUTPUT_ACCESS_KEY_ID =
            ConfigOptions.key("dirty.side-output.s3.access-key-id")
                    .stringType()
                    .noDefaultValue()
                    .withDescription("The access key of s3");
    private static final ConfigOption DIRTY_SIDE_OUTPUT_SECRET_KEY_ID =
            ConfigOptions.key("dirty.side-output.s3.secret-key-id")
                    .stringType()
                    .noDefaultValue()
                    .withDescription("The secret key of s3");

    @Override
    public  DirtySink createDirtySink(Context context) {
        ReadableConfig config = Configuration.fromMap(context.getCatalogTable().getOptions());
        FactoryUtil.validateFactoryOptions(this, config);
        validate(config);
        return new S3DirtySink<>(getS3Options(config),
                context.getCatalogTable().getResolvedSchema().toPhysicalRowDataType());
    }

    private void validate(ReadableConfig config) {
        String identifier = config.getOptional(DIRTY_IDENTIFIER).orElse(null);
        if (identifier == null || identifier.trim().length() == 0) {
            throw new ValidationException(
                    "The option 'dirty.identifier' is not allowed to be empty.");
        }
    }

    private S3Options getS3Options(ReadableConfig config) {
        final S3Options.Builder builder = S3Options.builder()
                .setEndpoint(config.getOptional(DIRTY_SIDE_OUTPUT_ENDPOINT).orElse(null))
                .setRegion(config.getOptional(DIRTY_SIDE_OUTPUT_REGION).orElse(null))
                .setBucket(config.getOptional(DIRTY_SIDE_OUTPUT_BUCKET).orElse(null))
                .setKey(config.getOptional(DIRTY_SIDE_OUTPUT_KEY).orElse(null))
                .setBatchSize(config.get(DIRTY_SIDE_OUTPUT_BATCH_SIZE))
                .setMaxRetries(config.get(DIRTY_SIDE_OUTPUT_RETRIES))
                .setBatchIntervalMs(config.get(DIRTY_SIDE_OUTPUT_BATCH_INTERVAL))
                .setMaxBatchBytes(config.get(DIRTY_SIDE_OUTPUT_BATCH_BYTES))
                .setFormat(config.get(DIRTY_SIDE_OUTPUT_FORMAT))
                .setIgnoreSideOutputErrors(config.get(DIRTY_SIDE_OUTPUT_IGNORE_ERRORS))
                .setEnableDirtyLog(config.get(DIRTY_SIDE_OUTPUT_LOG_ENABLE))
                .setFieldDelimiter(config.get(DIRTY_SIDE_OUTPUT_FIELD_DELIMITER))
                .setLineDelimiter(config.get(DIRTY_SIDE_OUTPUT_LINE_DELIMITER))
                .setAccessKeyId(config.getOptional(DIRTY_SIDE_OUTPUT_ACCESS_KEY_ID).orElse(null))
                .setSecretKeyId(config.getOptional(DIRTY_SIDE_OUTPUT_SECRET_KEY_ID).orElse(null));
        return builder.build();
    }

    @Override
    public String factoryIdentifier() {
        return IDENTIFIER;
    }

    @Override
    public Set> requiredOptions() {
        final Set> options = new HashSet<>();
        options.add(DIRTY_SIDE_OUTPUT_ENDPOINT);
        options.add(DIRTY_SIDE_OUTPUT_REGION);
        options.add(DIRTY_SIDE_OUTPUT_BUCKET);
        options.add(DIRTY_SIDE_OUTPUT_KEY);
        options.add(DIRTY_IDENTIFIER);
        return options;
    }

    @Override
    public Set> optionalOptions() {
        final Set> options = new HashSet<>();
        options.add(DIRTY_SIDE_OUTPUT_BATCH_SIZE);
        options.add(DIRTY_SIDE_OUTPUT_RETRIES);
        options.add(DIRTY_SIDE_OUTPUT_BATCH_INTERVAL);
        options.add(DIRTY_SIDE_OUTPUT_BATCH_BYTES);
        options.add(DIRTY_SIDE_OUTPUT_FORMAT);
        options.add(DIRTY_SIDE_OUTPUT_IGNORE_ERRORS);
        options.add(DIRTY_SIDE_OUTPUT_LOG_ENABLE);
        options.add(DIRTY_SIDE_OUTPUT_FIELD_DELIMITER);
        options.add(DIRTY_SIDE_OUTPUT_LINE_DELIMITER);
        options.add(DIRTY_SIDE_OUTPUT_ACCESS_KEY_ID);
        options.add(DIRTY_SIDE_OUTPUT_SECRET_KEY_ID);
        return options;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy