All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pulsar.io.file.FileSourceConfig Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pulsar.io.file;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import lombok.Data;
import lombok.experimental.Accessors;
import org.apache.commons.lang3.StringUtils;

/**
 * Configuration class for the File Source Connector.
 */
@Data
@Accessors(chain = true)
public class FileSourceConfig implements Serializable {

    private static final long serialVersionUID = 1L;

    /**
     * The input directory from which to pull files.
     */
    private String inputDirectory;

    /**
     * Indicates whether or not to pull files from sub-directories.
     */
    private Boolean recurse;

    /**
     * If true, the file is not deleted after it has been processed and
     * causes the file to be picked up continually.
     */
    private Boolean keepFile = Boolean.FALSE;

    /**
     * Only files whose names match the given regular expression will be picked up.
     */
    private String fileFilter = "[^.].*";

    /**
     * When 'recurse' property is true, then only sub-directories whose
     * path matches the given regular expression will be scanned.
     */
    private String pathFilter;

    /**
     * The minimum age that a file must be in order to be processed; any file younger
     * than this amount of time (according to last modification date) will be ignored.
     */
    private Integer minimumFileAge;

    /**
     * The maximum age that a file must be in order to be processed; any file older
     * than this amount of time (according to last modification date) will be ignored.
     */
    private Long maximumFileAge;

    /**
     * The minimum size (in bytes) that a file must be in order to be processed.
     */
    private Integer minimumSize;

    /**
     * The maximum size (in bytes) that a file can be in order to be processed.
     */
    private Double maximumSize;

    /**
     * Indicates whether or not hidden files should be ignored or not.
     */
    private Boolean ignoreHiddenFiles;

    /**
     * Indicates how long to wait before performing a directory listing.
     */
    private Long pollingInterval;

    /**
     * The number of worker threads that will be processing the files.
     * This allows you to process a larger number of files concurrently.
     * However, setting this to a value greater than 1 will result in the data
     * from multiple files being "intermingled" in the target topic.
     */
    private Integer numWorkers = 1;

    /**
     * If set, do not delete but only rename file that has been processed.
     * This config only work when 'keepFile' property is false.
     */
    private String processedFileSuffix;

    public static FileSourceConfig load(String yamlFile) throws IOException {
        ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
        return mapper.readValue(new File(yamlFile), FileSourceConfig.class);
    }

    public static FileSourceConfig load(Map map) throws IOException {
        ObjectMapper mapper = new ObjectMapper();
        return mapper.readValue(mapper.writeValueAsString(map), FileSourceConfig.class);
    }

    public void validate() {
        if (StringUtils.isBlank(inputDirectory)) {
            throw new IllegalArgumentException("Required property not set.");
        } else if (Files.notExists(Paths.get(inputDirectory), LinkOption.NOFOLLOW_LINKS)) {
            throw new IllegalArgumentException("Specified input directory does not exist");
        } else if (!Files.isReadable(Paths.get(inputDirectory))) {
            throw new IllegalArgumentException("Specified input directory is not readable");
        } else if (!Optional.ofNullable(keepFile).orElse(false) && !Files.isWritable(Paths.get(inputDirectory))) {
            throw new IllegalArgumentException("You have requested the consumed files to be deleted, but the "
                    + "source directory is not writeable.");
        }

        if (StringUtils.isNotBlank(fileFilter)) {
            try {
                Pattern.compile(fileFilter);
            } catch (final PatternSyntaxException psEx) {
                throw new IllegalArgumentException("Invalid Regex pattern provided for fileFilter");
            }
        }

        if (minimumFileAge != null &&  Math.signum(minimumFileAge) < 0) {
            throw new IllegalArgumentException("The property minimumFileAge must be non-negative");
        }

        if (maximumFileAge != null && Math.signum(maximumFileAge) < 0) {
            throw new IllegalArgumentException("The property maximumFileAge must be non-negative");
        }

        if (minimumSize != null && Math.signum(minimumSize) < 0) {
            throw new IllegalArgumentException("The property minimumSize must be non-negative");
        }

        if (maximumSize != null && Math.signum(maximumSize) < 0) {
            throw new IllegalArgumentException("The property maximumSize must be non-negative");
        }

        if (pollingInterval != null && pollingInterval <= 0) {
            throw new IllegalArgumentException("The property pollingInterval must be greater than zero");
        }

        if (numWorkers != null && numWorkers <= 0) {
            throw new IllegalArgumentException("The property numWorkers must be greater than zero");
        }

        if (processedFileSuffix != null && keepFile) {
            throw new IllegalArgumentException(
                    "The property keepFile must be false if the property processedFileSuffix is set");
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy