
io.cdap.plugin.batch.source.FTPBatchSource Maven / Gradle / Ivy
/*
* Copyright © 2016-2019 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.plugin.batch.source;
import com.google.common.reflect.TypeToken;
import com.google.gson.Gson;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Macro;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.plugin.PluginConfig;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.cdap.etl.api.batch.BatchSource;
import io.cdap.cdap.etl.api.batch.BatchSourceContext;
import io.cdap.plugin.format.FileFormat;
import io.cdap.plugin.format.plugin.AbstractFileSource;
import io.cdap.plugin.format.plugin.FileSourceProperties;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.lang.reflect.Type;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
/**
* {@link BatchSource} that reads from an FTP or SFTP server.
*/
@Plugin(type = "batchsource")
@Name("FTP")
@Description("Batch source for an FTP or SFTP source. Prefix of the path ('ftp://...' or 'sftp://...') determines " +
"the source server type, either FTP or SFTP.")
public class FTPBatchSource extends AbstractFileSource {
private static final String NAME_FILE_SYSTEM_PROPERTIES = "fileSystemProperties";
private static final String FS_SFTP_IMPL = "fs.sftp.impl";
private static final String SFTP_FS_CLASS = "org.apache.hadoop.fs.sftp.SFTPFileSystem";
public static final Schema SCHEMA = Schema.recordOf("text",
Schema.Field.of("offset", Schema.of(Schema.Type.LONG)),
Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
private final FTPBatchSourceConfig config;
public FTPBatchSource(FTPBatchSourceConfig config) {
super(config);
this.config = config;
}
@Override
protected Map getFileSystemProperties(BatchSourceContext context) {
Map properties = new HashMap<>(config.getFileSystemProperties());
if (!properties.containsKey(FS_SFTP_IMPL)) {
properties.put(FS_SFTP_IMPL, SFTP_FS_CLASS);
}
// Limit the number of splits to 1 since FTPInputStream does not support seek;
properties.put(FileInputFormat.SPLIT_MINSIZE, Long.toString(Long.MAX_VALUE));
return properties;
}
/**
* Config class that contains all the properties needed for FTP Batch Source.
*/
@SuppressWarnings("unused")
public static class FTPBatchSourceConfig extends PluginConfig implements FileSourceProperties {
private static final Gson GSON = new Gson();
private static final Type MAP_STRING_STRING_TYPE = new TypeToken
© 2015 - 2025 Weber Informatics LLC | Privacy Policy