
com.github.jcustenborder.kafka.connect.spooldir.SpoolDirSourceTask Maven / Gradle / Ivy
Show all versions of kafka-connect-spooldir Show documentation
/**
* Copyright © 2016 Jeremy Custenborder ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.VersionUtil;
import com.github.jcustenborder.kafka.connect.utils.data.Parser;
import com.github.jcustenborder.kafka.connect.utils.data.type.DateTypeParser;
import com.github.jcustenborder.kafka.connect.utils.data.type.TimeTypeParser;
import com.github.jcustenborder.kafka.connect.utils.data.type.TimestampTypeParser;
import com.github.jcustenborder.kafka.connect.utils.data.type.TypeParser;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Files;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.source.SourceTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public abstract class SpoolDirSourceTask extends SourceTask {
static final Logger log = LoggerFactory.getLogger(SpoolDirSourceTask.class);
protected Parser parser;
protected Map sourcePartition;
CONF config;
Stopwatch processingTime = Stopwatch.createStarted();
private File inputFile;
private long inputFileModifiedTime;
private InputStream inputStream;
private boolean hasRecords = false;
private Map metadata;
private static void checkDirectory(String key, File directoryPath) {
if (log.isInfoEnabled()) {
log.info("Checking if directory {} '{}' exists.",
key,
directoryPath
);
}
String errorMessage = String.format(
"Directory for '%s' '%s' does not exist ",
key,
directoryPath
);
if (!directoryPath.isDirectory()) {
throw new ConnectException(
errorMessage,
new FileNotFoundException(directoryPath.getAbsolutePath())
);
}
if (log.isInfoEnabled()) {
log.info("Checking to ensure {} '{}' is writable ", key, directoryPath);
}
errorMessage = String.format(
"Directory for '%s' '%s' it not writable.",
key,
directoryPath
);
File temporaryFile = null;
try {
temporaryFile = File.createTempFile(".permission", ".testing", directoryPath);
} catch (IOException ex) {
throw new ConnectException(
errorMessage,
ex
);
} finally {
try {
if (null != temporaryFile && temporaryFile.exists()) {
Preconditions.checkState(temporaryFile.delete(), "Unable to delete temp file in %s", directoryPath);
}
} catch (Exception ex) {
if (log.isWarnEnabled()) {
log.warn("Exception thrown while deleting {}.", temporaryFile, ex);
}
}
}
}
protected abstract CONF config(Map settings);
protected abstract void configure(InputStream inputStream, Map metadata, Long lastOffset) throws IOException;
protected abstract List process() throws IOException;
protected abstract long recordOffset();
@Override
public String version() {
return VersionUtil.version(this.getClass());
}
@Override
public void start(Map settings) {
this.config = config(settings);
checkDirectory(SpoolDirSourceConnectorConfig.INPUT_PATH_CONFIG, this.config.inputPath);
checkDirectory(SpoolDirSourceConnectorConfig.FINISHED_PATH_CONFIG, this.config.finishedPath);
checkDirectory(SpoolDirSourceConnectorConfig.ERROR_PATH_CONFIG, this.config.errorPath);
this.parser = new Parser();
Map dateTypeParsers = ImmutableMap.of(
Timestamp.SCHEMA, new TimestampTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats),
Date.SCHEMA, new DateTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats),
Time.SCHEMA, new TimeTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats)
);
for (Map.Entry kvp : dateTypeParsers.entrySet()) {
this.parser.registerTypeParser(kvp.getKey(), kvp.getValue());
}
}
@Override
public void stop() {
}
@Override
public List poll() throws InterruptedException {
log.trace("poll()");
List results = read();
if (results.isEmpty()) {
log.trace("read() returned empty list. Sleeping {} ms.", this.config.emptyPollWaitMs);
Thread.sleep(this.config.emptyPollWaitMs);
}
log.trace("read() returning {} result(s)", results.size());
return results;
}
private void closeAndMoveToFinished(File outputDirectory, boolean errored) throws IOException {
if (null != inputStream) {
log.info("Closing {}", this.inputFile);
this.inputStream.close();
this.inputStream = null;
File finishedFile = new File(outputDirectory, this.inputFile.getName());
if (errored) {
log.error("Error during processing, moving {} to {}.", this.inputFile, outputDirectory);
} else {
log.info("Finished processing {} in {} second(s). Moving to {}.", this.inputFile, processingTime.elapsed(TimeUnit.SECONDS), outputDirectory);
}
Files.move(this.inputFile, finishedFile);
File processingFile = processingFile(this.inputFile);
if (processingFile.exists()) {
log.info("Removing processing file {}", processingFile);
processingFile.delete();
}
}
}
File processingFile(File input) {
String fileName = input.getName() + this.config.processingFileExtension;
return new File(input.getParentFile(), fileName);
}
File findNextInputFile() {
File[] input = this.config.inputPath.listFiles(this.config.inputFilenameFilter);
if (null == input || input.length == 0) {
log.debug("No files matching {} were found in {}", SpoolDirSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, this.config.inputPath);
return null;
}
List files = new ArrayList<>(input.length);
for (File f : input) {
File processingFile = processingFile(f);
log.trace("Checking for processing file: {}", processingFile);
if (processingFile.exists()) {
log.debug("Skipping {} because processing file exists.", f);
continue;
}
files.add(f);
}
File result = null;
for (File file : files) {
long fileAgeMS = System.currentTimeMillis() - file.lastModified();
if (fileAgeMS < 0L) {
log.warn("File {} has a date in the future.", file);
}
if (this.config.minimumFileAgeMS > 0L && fileAgeMS < this.config.minimumFileAgeMS) {
log.debug("Skipping {} because it does not meet the minimum age.", file);
continue;
}
result = file;
break;
}
return result;
}
public List read() {
try {
if (!hasRecords) {
closeAndMoveToFinished(this.config.finishedPath, false);
File nextFile = findNextInputFile();
if (null == nextFile) {
return new ArrayList<>();
}
this.metadata = ImmutableMap.of();
this.inputFile = nextFile;
this.inputFileModifiedTime = this.inputFile.lastModified();
File processingFile = processingFile(this.inputFile);
Files.touch(processingFile);
try {
this.sourcePartition = ImmutableMap.of(
"fileName", this.inputFile.getName()
);
log.info("Opening {}", this.inputFile);
Long lastOffset = null;
log.trace("looking up offset for {}", this.sourcePartition);
Map offset = this.context.offsetStorageReader().offset(this.sourcePartition);
if (null != offset && !offset.isEmpty()) {
Number number = (Number) offset.get("offset");
lastOffset = number.longValue();
}
this.inputStream = new FileInputStream(this.inputFile);
configure(this.inputStream, this.metadata, lastOffset);
} catch (Exception ex) {
throw new ConnectException(ex);
}
processingTime.reset();
processingTime.start();
}
List records = process();
this.hasRecords = !records.isEmpty();
return records;
} catch (Exception ex) {
log.error("Exception encountered processing line {} of {}.", recordOffset(), this.inputFile, ex);
try {
closeAndMoveToFinished(this.config.errorPath, true);
} catch (IOException ex0) {
log.error("Exception thrown while moving {} to {}", this.inputFile, this.config.errorPath, ex0);
}
if (this.config.haltOnError) {
throw new ConnectException(ex);
} else {
return new ArrayList<>();
}
}
}
protected void addRecord(List records, Struct keyStruct, Struct valueStruct) {
Map sourceOffset = ImmutableMap.of(
"offset",
recordOffset()
);
log.trace("addRecord() - {}", sourceOffset);
if (this.config.hasKeyMetadataField && null != keyStruct) {
keyStruct.put(this.config.keyMetadataField, this.metadata);
}
if (this.config.hasvalueMetadataField && null != valueStruct) {
valueStruct.put(this.config.valueMetadataField, this.metadata);
}
final Long timestamp;
switch (this.config.timestampMode) {
case FIELD:
log.trace("addRecord() - Reading date from timestamp field '{}'", this.config.timestampField);
java.util.Date date = (java.util.Date) valueStruct.get(this.config.timestampField);
timestamp = date.getTime();
break;
case FILE_TIME:
timestamp = this.inputFileModifiedTime;
break;
case PROCESS_TIME:
timestamp = null;
break;
default:
throw new UnsupportedOperationException(
String.format("Unsupported timestamp mode. %s", this.config.timestampMode)
);
}
//TODO: Comeback and add timestamp support.
SourceRecord sourceRecord = new SourceRecord(
this.sourcePartition,
sourceOffset,
this.config.topic,
null,
this.config.keySchema,
keyStruct,
this.config.valueSchema,
valueStruct,
timestamp
);
records.add(sourceRecord);
}
}