All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jcustenborder.kafka.connect.vertica.VerticaSinkTask Maven / Gradle / Ivy

/**
 * Copyright © 2017 Jeremy Custenborder ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.github.jcustenborder.kafka.connect.vertica;

import com.github.jcustenborder.kafka.connect.utils.VersionUtil;
import com.github.jcustenborder.vertica.QueryBuilder;
import com.github.jcustenborder.vertica.VerticaColumnInfo;
import com.github.jcustenborder.vertica.VerticaColumnType;
import com.github.jcustenborder.vertica.VerticaStreamWriter;
import com.github.jcustenborder.vertica.VerticaStreamWriterBuilder;
import com.google.common.base.Stopwatch;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multiset;
import com.vertica.jdbc.VerticaConnection;
import com.vertica.jdbc.VerticaCopyStream;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.RetriableException;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.sql.DataSource;
import java.io.IOException;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class VerticaSinkTask extends SinkTask {
  private static final Logger log = LoggerFactory.getLogger(VerticaSinkTask.class);
  private static final Pattern PATTERN = Pattern.compile("^[a-z]+", Pattern.CASE_INSENSITIVE);
  private final static String SQL = "SELECT\n" +
      "  c.column_name,\n" +
      "  c.data_type,\n" +
      "  c.data_type_length,\n" +
      "  c.numeric_precision,\n" +
      "  c.numeric_scale\n" +
      "FROM columns c\n" +
      "  INNER JOIN tables t\n" +
      "    ON c.table_id = t.table_id\n" +
      "WHERE\n" +
      "  upper(t.table_name) = upper(?)\n" +
      "ORDER BY c.ordinal_position;";

  ExecutorService executorService = Executors.newSingleThreadExecutor();
  VerticaSinkConnectorConfig config;
  Map typeLookup;
  Cache builderCache;


  @Override
  public String version() {
    return VersionUtil.version(this.getClass());
  }

  @Override
  public void start(Map settings) {
    this.config = new VerticaSinkConnectorConfig(settings);
    this.builderCache = CacheBuilder.newBuilder()
        .expireAfterWrite(this.config.streamBuilderCacheMs, TimeUnit.MILLISECONDS)
        .build();
  }

  VerticaStreamWriterBuilder configureBuilder(Connection connection, final String tableName) throws ExecutionException {
    return this.builderCache.get(tableName, () -> {
      VerticaStreamWriterBuilder builder = new VerticaStreamWriterBuilder();
      builder.compressionType(this.config.compressionType);
      builder.loadMethod(this.config.loadMethod);
      builder.table(tableName);
      log.trace("configureBuilder() - Finding columns for '{}'", tableName);
      List fields = new ArrayList<>();
      try (PreparedStatement statement = connection.prepareStatement(SQL)) {
        statement.setString(1, tableName);
        try (ResultSet results = statement.executeQuery()) {
          while (results.next()) {
            String columnName = results.getString(1);
            fields.add(columnName);
            String dataType = results.getString(2);

            Matcher matcher = PATTERN.matcher(dataType);

            if (!matcher.find()) {
              log.warn("Could not match '{}' for column '{}'", dataType, columnName);
              continue;
            }

            String stype = matcher.group(0).toUpperCase();

            VerticaColumnType type = VerticaColumnType.valueOf(stype);
            log.info("configureBuilder() - '{}' = {}", columnName, type);
            builder.column(columnName, type);
          }
        }
      }
      return builder;
    });
  }


  @Override
  public void put(Collection records) {
    Multimap recordsByTable = HashMultimap.create(this.config.expectedTopics, this.config.expectedRecords);
    Multiset countsByTable = HashMultiset.create(this.config.expectedTopics);

    for (SinkRecord record : records) {
      String table = record.topic();
      countsByTable.add(table);
      recordsByTable.put(table, record);
    }

    for (String table : countsByTable.elementSet()) {
      log.trace("put() - Writing {} record(s) to {}", countsByTable.count(table), table);
    }

    DataSource dataSource = PoolOfPools.get(this.config);

    try (Connection connection = dataSource.getConnection()) {
      VerticaConnection verticaConnection = connection.unwrap(VerticaConnection.class);

      try {
        for (final String tableName : recordsByTable.keys()) {
          log.trace("put() - Processing records for table '{}'", tableName);
          Collection tableRecords = recordsByTable.get(tableName);

          VerticaStreamWriterBuilder builder = configureBuilder(verticaConnection, tableName);

          final String statement = new QueryBuilder(builder).toString();

          log.info("put() - Creating VerticaCopyStream with statement:\n{}", statement);
          VerticaCopyStream copyStream = new VerticaCopyStream(verticaConnection, statement);
          copyStream.start();

          final PipedInputStream inputStream = new PipedInputStream(this.config.inputBufferSize);
          final PipedOutputStream outputStream = new PipedOutputStream(inputStream);

          try {
            Stopwatch stopwatch = Stopwatch.createStarted();

            Future importFuture = executorService.submit(() -> {
              try {
                copyStream.addStream(inputStream);
                copyStream.execute();
              } catch (SQLException e) {
                throw new IllegalStateException(e);
              }
            });

            int count = 0;
            try (VerticaStreamWriter writer = builder.build(outputStream)) {
              for (SinkRecord record : tableRecords) {
                Struct value = (Struct) record.value();
                int i = 0;
                Object[] values = new Object[writer.columns().size()];
                for (VerticaColumnInfo columnInfo : writer.columns()) {
                  values[i] = value.get(columnInfo.name());
                  i++;
                }
                log.trace("Writing row");
                writer.write(values);
                count++;
              }
              log.info("Wrote {} record(s) to stream", count);
            }
            outputStream.close();

            log.info("Waiting for import to complete.");

            try {
              importFuture.get(this.config.streamTimeoutMs, TimeUnit.MILLISECONDS);
            } catch (TimeoutException e) {
              log.warn("Import exceeded timeout of {} ms. Rolling back", this.config.streamTimeoutMs);
              connection.rollback();
            }

            log.info("put() - Imported {} record(s) in {} millisecond(s).", count, stopwatch.elapsed(TimeUnit.MILLISECONDS));
            final int rejectedCount = copyStream.getRejects().size();
            if (rejectedCount > 0) {
              log.warn("put() - Rejected {} record(s).", copyStream.getRejects().size());
              for (Long l : copyStream.getRejects()) {
                log.warn("Rejected row {}", l);
              }
            }
          } catch (InterruptedException | ExecutionException e) {
            log.error("Exception thrown", e);
          } finally {
            inputStream.close();
          }
        }
      } catch (IOException ex) {
        throw new RetriableException(ex);
      } catch (ExecutionException ex) {
        throw new RetriableException(ex);
      }

      log.trace("put() - committing transaction");
      connection.commit();
    } catch (SQLException ex) {
      throw new RetriableException(ex);
    }
  }

  @Override
  public void flush(Map map) {

  }

  @Override
  public void stop() {

  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy