com.couchbase.connect.kafka.CouchbaseSinkTask Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of kafka-connect-couchbase Show documentation
A Kafka Connect Couchbase connector for copying data between Kafka and Couchbase Server.
There is a newer version: 4.2.6
Show newest version
/*
 * Copyright (c) 2017 Couchbase, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.couchbase.connect.kafka;

import com.couchbase.client.core.error.DocumentNotFoundException;
import com.couchbase.client.core.logging.LogRedaction;
import com.couchbase.client.java.Collection;
import com.couchbase.client.java.codec.RawJsonTranscoder;
import com.couchbase.client.java.kv.RemoveOptions;
import com.couchbase.client.java.kv.UpsertOptions;
import com.couchbase.connect.kafka.config.sink.CouchbaseSinkConfig;
import com.couchbase.connect.kafka.sink.DocumentMode;
import com.couchbase.connect.kafka.sink.N1qlMode;
import com.couchbase.connect.kafka.sink.N1qlWriter;
import com.couchbase.connect.kafka.sink.SubDocumentMode;
import com.couchbase.connect.kafka.sink.SubDocumentWriter;
import com.couchbase.connect.kafka.util.DocumentIdExtractor;
import com.couchbase.connect.kafka.util.DocumentPathExtractor;
import com.couchbase.connect.kafka.util.DurabilitySetter;
import com.couchbase.connect.kafka.util.JsonBinaryDocument;
import com.couchbase.connect.kafka.util.ScopeAndCollection;
import com.couchbase.connect.kafka.util.TopicMap;
import com.couchbase.connect.kafka.util.Version;
import com.couchbase.connect.kafka.util.config.ConfigHelper;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.couchbase.client.core.util.CbCollections.mapOf;
import static com.couchbase.client.java.kv.RemoveOptions.removeOptions;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

public class CouchbaseSinkTask extends SinkTask {
  private static final Logger LOGGER = LoggerFactory.getLogger(CouchbaseSinkTask.class);

  private String bucketName;
  private ScopeAndCollection defaultDestCollection;
  private Map topicToCollection;
  private KafkaCouchbaseClient client;
  private JsonConverter converter;
  private DocumentIdExtractor documentIdExtractor;
  private DocumentMode documentMode;

  private SubDocumentWriter subDocumentWriter;
  private N1qlWriter n1qlWriter;

  private DurabilitySetter durabilitySetter;

  private Duration documentExpiry;

  @Override
  public String version() {
    return Version.getVersion();
  }

  @Override
  public void start(Map properties) {
    CouchbaseSinkConfig config;
    try {
      config = ConfigHelper.parse(CouchbaseSinkConfig.class, properties);
    } catch (ConfigException e) {
      throw new ConnectException("Couldn't start CouchbaseSinkTask due to configuration error", e);
    }

    LogRedaction.setRedactionLevel(config.logRedaction());
    client = new KafkaCouchbaseClient(config);
    bucketName = config.bucket();
    defaultDestCollection = ScopeAndCollection.parse(config.defaultCollection());
    topicToCollection = TopicMap.parse(config.topicToCollection());

    converter = new JsonConverter();
    converter.configure(mapOf("schemas.enable", false), false);

    String docIdPointer = config.documentId();
    if (docIdPointer != null && !docIdPointer.isEmpty()) {
      documentIdExtractor = new DocumentIdExtractor(docIdPointer, config.removeDocumentId());
    }

    documentMode = config.documentMode();
    durabilitySetter = DurabilitySetter.create(config);
    documentExpiry = config.documentExpiration();

    switch (documentMode) {
      case SUBDOCUMENT: {
        SubDocumentMode subDocumentMode = config.subdocumentOperation();
        String path = config.subdocumentPath();
        boolean createPaths = config.subdocumentCreatePath();
        boolean createDocuments = config.createDocument();

        subDocumentWriter = new SubDocumentWriter(subDocumentMode, path, createPaths, createDocuments, documentExpiry);
        break;
      }
      case N1QL: {
        N1qlMode n1qlMode = config.n1qlOperation();
        boolean createDocuments = config.createDocument();
        List n1qlWhereFields = config.n1qlWhereFields();

        n1qlWriter = new N1qlWriter(n1qlMode, n1qlWhereFields, createDocuments);
        break;
      }
    }
  }

  private static class SinkRecordAndDocument {
    private final SinkRecord sinkRecord;
    private final JsonBinaryDocument document;

    public SinkRecordAndDocument(SinkRecord sinkRecord, JsonBinaryDocument document) {
      this.sinkRecord = requireNonNull(sinkRecord);
      this.document = document; // nullable
    }
  }

  @Override
  public void put(java.util.Collection records) {
    if (records.isEmpty()) {
      return;
    }
    final SinkRecord first = records.iterator().next();
    final int recordsCount = records.size();
    LOGGER.trace("Received {} records. First record kafka coordinates:({}-{}-{}). Writing them to the Couchbase...",
        recordsCount, first.topic(), first.kafkaPartition(), first.kafkaOffset());

    Map idToDocumentOrNull = toJsonBinaryDocuments(records);

    Flux.fromIterable(idToDocumentOrNull.entrySet())
        .flatMap(entry -> {
          SinkRecordAndDocument sinkRecordAndDocument = entry.getValue();
          SinkRecord sinkRecord = sinkRecordAndDocument.sinkRecord;
          JsonBinaryDocument doc = sinkRecordAndDocument.document;

          ScopeAndCollection destCollectionSpec = topicToCollection.getOrDefault(sinkRecord.topic(), defaultDestCollection);
          Collection destCollection = client.collection(destCollectionSpec);

          if (doc == null) {
            return removeIfExists(destCollection, entry.getKey());
          }

          switch (documentMode) {
            case N1QL: {
              return n1qlWriter.write(client.cluster(), bucketName, doc);
            }
            case SUBDOCUMENT: {
              return subDocumentWriter.write(destCollection.reactive(), doc, durabilitySetter);
            }
            default: {
              UpsertOptions options = UpsertOptions.upsertOptions()
                  .expiry(documentExpiry)
                  .transcoder(RawJsonTranscoder.INSTANCE);
              durabilitySetter.accept(options);

              return destCollection.reactive()
                  .upsert(doc.id(), doc.content(), options)
                  .then();
            }
          }
        }).blockLast();
  }

  /**
   * Converts Kafka records to documents and indexes them by document ID.
   * 
   * If there are duplicate document IDs, ignores all but the last. This
   * prevents a stale version of the document from "winning" by being the
   * last one written to Couchbase.
   *
   * @return a map where the key is the ID of a document, and the value is the document.
   * A null value indicates the document should be deleted.
   */
  private Map toJsonBinaryDocuments(java.util.Collection records) {
    Map idToSourceRecordAndDocument = new HashMap<>();
    for (SinkRecord record : records) {
      if (record.value() == null) {
        String documentId = documentIdFromKafkaMetadata(record);
        idToSourceRecordAndDocument.put(documentId, new SinkRecordAndDocument(record, null));
        continue;
      }

      JsonBinaryDocument doc = convert(record);
      idToSourceRecordAndDocument.put(doc.id(), new SinkRecordAndDocument(record, doc));
    }

    int deduplicatedRecords = records.size() - idToSourceRecordAndDocument.size();
    if (deduplicatedRecords != 0) {
      LOGGER.debug("Batch contained {} redundant Kafka records.", deduplicatedRecords);
    }

    return idToSourceRecordAndDocument;
  }

  private Mono removeIfExists(Collection collection, String documentId) {
    RemoveOptions options = removeOptions();
    durabilitySetter.accept(options);
    return collection.reactive()
        .remove(documentId, options)
        .onErrorResume(DocumentNotFoundException.class, throwable -> Mono.empty())
        .then();
  }

  private static String toString(ByteBuffer byteBuffer) {
    final ByteBuffer sliced = byteBuffer.slice();
    byte[] bytes = new byte[sliced.remaining()];
    sliced.get(bytes);
    return new String(bytes, UTF_8);
  }

  private static String documentIdFromKafkaMetadata(SinkRecord record) {
    Object key = record.key();

    if (key instanceof String
        || key instanceof Number
        || key instanceof Boolean) {
      return key.toString();
    }

    if (key instanceof byte[]) {
      return new String((byte[]) key, UTF_8);
    }

    if (key instanceof ByteBuffer) {
      return toString((ByteBuffer) key);
    }

    return record.topic() + "/" + record.kafkaPartition() + "/" + record.kafkaOffset();
  }


  private JsonBinaryDocument convert(SinkRecord record) {

    byte[] valueAsJsonBytes = converter.fromConnectData(record.topic(), record.valueSchema(), record.value());
    String defaultId = null;

    try {
      if (documentIdExtractor != null) {
        return documentIdExtractor.extractDocumentId(valueAsJsonBytes);
      }

    } catch (DocumentPathExtractor.DocumentPathNotFoundException e) {
      defaultId = documentIdFromKafkaMetadata(record);
      LOGGER.warn(e.getMessage() + "; using fallback ID '{}'", defaultId);

    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    if (defaultId == null) {
      defaultId = documentIdFromKafkaMetadata(record);
    }

    return new JsonBinaryDocument(defaultId, valueAsJsonBytes);
  }

  @Override
  public void flush(Map offsets) {
  }

  @Override
  public void stop() {
    client.close();
  }
}