
com.github.jcustenborder.kafka.connect.vertica.VerticaSinkTask Maven / Gradle / Ivy
/**
* Copyright © 2017 Jeremy Custenborder ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.vertica;
import com.github.jcustenborder.kafka.connect.utils.VersionUtil;
import com.github.jcustenborder.vertica.QueryBuilder;
import com.github.jcustenborder.vertica.VerticaColumnInfo;
import com.github.jcustenborder.vertica.VerticaColumnType;
import com.github.jcustenborder.vertica.VerticaStreamWriter;
import com.github.jcustenborder.vertica.VerticaStreamWriterBuilder;
import com.google.common.base.Stopwatch;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multiset;
import com.vertica.jdbc.VerticaConnection;
import com.vertica.jdbc.VerticaCopyStream;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.RetriableException;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.sql.DataSource;
import java.io.IOException;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class VerticaSinkTask extends SinkTask {
private static final Logger log = LoggerFactory.getLogger(VerticaSinkTask.class);
private static final Pattern PATTERN = Pattern.compile("^[a-z]+", Pattern.CASE_INSENSITIVE);
private final static String SQL = "SELECT\n" +
" c.column_name,\n" +
" c.data_type,\n" +
" c.data_type_length,\n" +
" c.numeric_precision,\n" +
" c.numeric_scale\n" +
"FROM columns c\n" +
" INNER JOIN tables t\n" +
" ON c.table_id = t.table_id\n" +
"WHERE\n" +
" upper(t.table_name) = upper(?)\n" +
"ORDER BY c.ordinal_position;";
ExecutorService executorService = Executors.newSingleThreadExecutor();
VerticaSinkConnectorConfig config;
Map typeLookup;
Cache builderCache;
@Override
public String version() {
return VersionUtil.version(this.getClass());
}
@Override
public void start(Map settings) {
this.config = new VerticaSinkConnectorConfig(settings);
this.builderCache = CacheBuilder.newBuilder()
.expireAfterWrite(this.config.streamBuilderCacheMs, TimeUnit.MILLISECONDS)
.build();
}
VerticaStreamWriterBuilder configureBuilder(Connection connection, final String tableName) throws ExecutionException {
return this.builderCache.get(tableName, () -> {
VerticaStreamWriterBuilder builder = new VerticaStreamWriterBuilder();
builder.compressionType(this.config.compressionType);
builder.loadMethod(this.config.loadMethod);
builder.table(tableName);
log.trace("configureBuilder() - Finding columns for '{}'", tableName);
List fields = new ArrayList<>();
try (PreparedStatement statement = connection.prepareStatement(SQL)) {
statement.setString(1, tableName);
try (ResultSet results = statement.executeQuery()) {
while (results.next()) {
String columnName = results.getString(1);
fields.add(columnName);
String dataType = results.getString(2);
Matcher matcher = PATTERN.matcher(dataType);
if (!matcher.find()) {
log.warn("Could not match '{}' for column '{}'", dataType, columnName);
continue;
}
String stype = matcher.group(0).toUpperCase();
VerticaColumnType type = VerticaColumnType.valueOf(stype);
log.info("configureBuilder() - '{}' = {}", columnName, type);
builder.column(columnName, type);
}
}
}
return builder;
});
}
@Override
public void put(Collection records) {
Multimap recordsByTable = HashMultimap.create(this.config.expectedTopics, this.config.expectedRecords);
Multiset countsByTable = HashMultiset.create(this.config.expectedTopics);
for (SinkRecord record : records) {
String table = record.topic();
countsByTable.add(table);
recordsByTable.put(table, record);
}
for (String table : countsByTable.elementSet()) {
log.trace("put() - Writing {} record(s) to {}", countsByTable.count(table), table);
}
DataSource dataSource = PoolOfPools.get(this.config);
try (Connection connection = dataSource.getConnection()) {
VerticaConnection verticaConnection = connection.unwrap(VerticaConnection.class);
try {
for (final String tableName : recordsByTable.keys()) {
log.trace("put() - Processing records for table '{}'", tableName);
Collection tableRecords = recordsByTable.get(tableName);
VerticaStreamWriterBuilder builder = configureBuilder(verticaConnection, tableName);
final String statement = new QueryBuilder(builder).toString();
log.info("put() - Creating VerticaCopyStream with statement:\n{}", statement);
VerticaCopyStream copyStream = new VerticaCopyStream(verticaConnection, statement);
copyStream.start();
final PipedInputStream inputStream = new PipedInputStream(this.config.inputBufferSize);
final PipedOutputStream outputStream = new PipedOutputStream(inputStream);
try {
Stopwatch stopwatch = Stopwatch.createStarted();
Future> importFuture = executorService.submit(() -> {
try {
copyStream.addStream(inputStream);
copyStream.execute();
} catch (SQLException e) {
throw new IllegalStateException(e);
}
});
int count = 0;
try (VerticaStreamWriter writer = builder.build(outputStream)) {
for (SinkRecord record : tableRecords) {
Struct value = (Struct) record.value();
int i = 0;
Object[] values = new Object[writer.columns().size()];
for (VerticaColumnInfo columnInfo : writer.columns()) {
values[i] = value.get(columnInfo.name());
i++;
}
log.trace("Writing row");
writer.write(values);
count++;
}
log.info("Wrote {} record(s) to stream", count);
}
outputStream.close();
log.info("Waiting for import to complete.");
try {
importFuture.get(this.config.streamTimeoutMs, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
log.warn("Import exceeded timeout of {} ms. Rolling back", this.config.streamTimeoutMs);
connection.rollback();
}
log.info("put() - Imported {} record(s) in {} millisecond(s).", count, stopwatch.elapsed(TimeUnit.MILLISECONDS));
final int rejectedCount = copyStream.getRejects().size();
if (rejectedCount > 0) {
log.warn("put() - Rejected {} record(s).", copyStream.getRejects().size());
for (Long l : copyStream.getRejects()) {
log.warn("Rejected row {}", l);
}
}
} catch (InterruptedException | ExecutionException e) {
log.error("Exception thrown", e);
} finally {
inputStream.close();
}
}
} catch (IOException ex) {
throw new RetriableException(ex);
} catch (ExecutionException ex) {
throw new RetriableException(ex);
}
log.trace("put() - committing transaction");
connection.commit();
} catch (SQLException ex) {
throw new RetriableException(ex);
}
}
@Override
public void flush(Map map) {
}
@Override
public void stop() {
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy