All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceTask Maven / Gradle / Ivy

There is a newer version: 1.0.31
Show newest version
/**
 * Copyright © 2016 Jeremy Custenborder ([email protected])
 * 

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.jcustenborder.kafka.connect.spooldir; import com.google.common.base.Joiner; import com.opencsv.CSVParser; import com.opencsv.CSVReader; import com.opencsv.CSVReaderBuilder; import org.apache.kafka.connect.data.Field; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.errors.DataException; import org.apache.kafka.connect.source.SourceRecord; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.Map; public class SpoolDirCsvSourceTask extends SpoolDirSourceTask { String[] fieldNames; private CSVParser csvParser; private CSVReader csvReader; private InputStreamReader streamReader; private Map fileMetadata; @Override protected SpoolDirCsvSourceConnectorConfig config(Map settings) { return new SpoolDirCsvSourceConnectorConfig(true, settings); } @Override protected void configure(InputStream inputStream, Map metadata, final Long lastOffset) throws IOException { log.trace("configure() - creating csvParser"); this.csvParser = this.config.createCSVParserBuilder().build(); this.streamReader = new InputStreamReader(inputStream, this.config.charset); CSVReaderBuilder csvReaderBuilder = this.config.createCSVReaderBuilder(this.streamReader, csvParser); this.csvReader = csvReaderBuilder.build(); String[] fieldNames; if (this.config.firstRowAsHeader) { log.trace("configure() - Reading the header row."); fieldNames = this.csvReader.readNext(); log.info("configure() - field names from header row. fields = {}", Joiner.on(", ").join(fieldNames)); } else { log.trace("configure() - Using fields from schema {}", this.config.valueSchema.name()); fieldNames = new String[this.config.valueSchema.fields().size()]; int index = 0; for (Field field : this.config.valueSchema.fields()) { fieldNames[index++] = field.name(); } log.info("configure() - field names from schema order. fields = {}", Joiner.on(", ").join(fieldNames)); } if (null != lastOffset) { log.info("Found previous offset. Skipping {} line(s).", lastOffset.intValue()); String[] row = null; while (null != (row = this.csvReader.readNext()) && this.csvReader.getLinesRead() < lastOffset) { log.trace("skipped row"); } } this.fieldNames = fieldNames; this.fileMetadata = metadata; } @Override public void start(Map settings) { super.start(settings); } @Override public long recordOffset() { return this.csvReader.getLinesRead(); } @Override public List process() throws IOException { List records = new ArrayList<>(this.config.batchSize); while (records.size() < this.config.batchSize) { String[] row = this.csvReader.readNext(); if (row == null) { break; } log.trace("process() - Row on line {} has {} field(s)", recordOffset(), row.length); Struct keyStruct = new Struct(this.config.keySchema); Struct valueStruct = new Struct(this.config.valueSchema); for (int i = 0; i < this.fieldNames.length; i++) { String fieldName = this.fieldNames[i]; log.trace("process() - Processing field {}", fieldName); String input = row[i]; log.trace("process() - input = '{}'", input); Object fieldValue = null; try { Field field = this.config.valueSchema.field(fieldName); if (null != field) { fieldValue = this.parser.parseString(field.schema(), input); log.trace("process() - output = '{}'", fieldValue); valueStruct.put(field, fieldValue); } else { log.trace("process() - Field {} is not defined in the schema.", fieldName); } } catch (Exception ex) { String message = String.format("Exception thrown while parsing data for '%s'. linenumber=%s", fieldName, this.recordOffset()); throw new DataException(message, ex); } Field keyField = this.config.keySchema.field(fieldName); if (null != keyField) { log.trace("process() - Setting key field '{}' to '{}'", keyField.name(), fieldValue); keyStruct.put(keyField, fieldValue); } } if (log.isInfoEnabled() && this.csvReader.getLinesRead() % ((long) this.config.batchSize * 20) == 0) { log.info("Processed {} lines of {}", this.csvReader.getLinesRead(), this.fileMetadata); } addRecord(records, keyStruct, valueStruct); } return records; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy