com.mongodb.kafka.connect.source.MongoSourceTask Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mongo-kafka-connect Show documentation
The official MongoDB Apache Kafka Connect Connector.
There is a newer version: 1.13.1
/*
 * Copyright 2008-present MongoDB, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.mongodb.kafka.connect.source;

import static com.mongodb.kafka.connect.source.MongoSourceConfig.BATCH_SIZE_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.COLLECTION_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.CONNECTION_URI_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.COPY_EXISTING_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.DATABASE_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.HEARTBEAT_INTERVAL_MS_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.HEARTBEAT_TOPIC_NAME_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.POLL_AWAIT_TIME_MS_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.POLL_MAX_BATCH_SIZE_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.PROVIDER_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.PUBLISH_FULL_DOCUMENT_ONLY_CONFIG;
import static com.mongodb.kafka.connect.source.heartbeat.HeartbeatManager.HEARTBEAT_KEY;
import static com.mongodb.kafka.connect.source.producer.SchemaAndValueProducers.createKeySchemaAndValueProvider;
import static com.mongodb.kafka.connect.source.producer.SchemaAndValueProducers.createValueSchemaAndValueProvider;
import static com.mongodb.kafka.connect.util.ConfigHelper.getMongoDriverInformation;
import static java.lang.String.format;
import static java.util.Collections.singletonMap;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;

import org.apache.kafka.common.utils.SystemTime;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.source.SourceTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.bson.BsonDocument;
import org.bson.BsonDocumentWrapper;
import org.bson.Document;
import org.bson.RawBsonDocument;

import com.mongodb.ConnectionString;
import com.mongodb.MongoClientSettings;
import com.mongodb.MongoCommandException;
import com.mongodb.client.ChangeStreamIterable;
import com.mongodb.client.MongoChangeStreamCursor;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.changestream.ChangeStreamDocument;

import com.mongodb.kafka.connect.Versions;
import com.mongodb.kafka.connect.source.MongoSourceConfig.OutputFormat;
import com.mongodb.kafka.connect.source.heartbeat.HeartbeatManager;
import com.mongodb.kafka.connect.source.producer.SchemaAndValueProducer;
import com.mongodb.kafka.connect.source.topic.mapping.TopicMapper;

/**
 * A Kafka Connect source task that uses change streams to broadcast changes to the collection,
 * database or client.
 *
 * Copy Existing Data
 *
 * If configured the connector will copy the existing data from the collection, database or
 * client. All namespaces that exist at the time of starting the task will be broadcast onto the
 * topic as insert operations. Only when all the data from all namespaces have been broadcast will
 * the change stream cursor start broadcasting new changes. The logic for copying existing data is
 * as follows:
 *
 * 

 *   Get the latest resumeToken from MongoDB
 *   
Create insert events for all configured namespaces using multiple threads. This step is
 *       completed only after all collections are successfully copied.
 *   
Start a change stream cursor from the saved resumeToken
 * 
 *
 * It should be noted that the reading of all the data during the copy and then the subsequent
 * change stream events may produce duplicated events. During the copy, clients can make changes to
 * the data in MongoDB, which may be represented both by the copying process and the change stream.
 * However, as the change stream events are idempotent the changes can be applied so that the data
 * is eventually consistent.
 *
 * 
It should also be noted renaming a collection during the copying process is not supported.
 *
 * 
Restarts
 *
 * Restarting the connector during the copying phase, will cause the whole copy process to restart.
 * Restarts after the copying process will resume from the last seen resumeToken.
 */
public final class MongoSourceTask extends SourceTask {
  private static final Logger LOGGER = LoggerFactory.getLogger(MongoSourceTask.class);
  private static final String CONNECTOR_TYPE = "source";
  public static final String ID_FIELD = "_id";
  private static final String COPY_KEY = "copy";
  private static final String NS_KEY = "ns";
  private static final String FULL_DOCUMENT = "fullDocument";
  private static final int NAMESPACE_NOT_FOUND_ERROR = 26;
  private static final int ILLEGAL_OPERATION_ERROR = 20;
  private static final int INVALIDATED_RESUME_TOKEN_ERROR = 260;
  private static final int UNKNOWN_FIELD_ERROR = 40415;
  private static final int FAILED_TO_PARSE_ERROR = 9;
  private static final String RESUME_TOKEN = "resume token";
  private static final String NOT_FOUND = "not found";
  private static final String DOES_NOT_EXIST = "does not exist";
  private static final String INVALID_RESUME_TOKEN = "invalid resume token";

  private final Time time;
  private final AtomicBoolean isRunning = new AtomicBoolean();
  private final AtomicBoolean isCopying = new AtomicBoolean();

  private MongoSourceConfig sourceConfig;
  private Map partitionMap;
  private MongoClient mongoClient;
  private HeartbeatManager heartbeatManager;

  private boolean supportsStartAfter = true;
  private boolean invalidatedCursor = false;
  private MongoCopyDataManager copyDataManager;
  private BsonDocument cachedResult;
  private BsonDocument cachedResumeToken;

  private MongoChangeStreamCursor cursor;

  public MongoSourceTask() {
    this(new SystemTime());
  }

  private MongoSourceTask(final Time time) {
    this.time = time;
  }

  @Override
  public String version() {
    return Versions.VERSION;
  }

  @Override
  public void start(final Map props) {
    LOGGER.info("Starting MongoDB source task");
    try {
      sourceConfig = new MongoSourceConfig(props);
    } catch (Exception e) {
      throw new ConnectException("Failed to start new task", e);
    }

    partitionMap = null;
    createPartitionMap(sourceConfig);

    mongoClient =
        MongoClients.create(
            sourceConfig.getConnectionString(),
            getMongoDriverInformation(CONNECTOR_TYPE, sourceConfig.getString(PROVIDER_CONFIG)));
    if (shouldCopyData()) {
      setCachedResultAndResumeToken();
      copyDataManager = new MongoCopyDataManager(sourceConfig, mongoClient);
      isCopying.set(true);
    } else {
      initializeCursorAndHeartbeatManager(time, sourceConfig, mongoClient);
    }
    isRunning.set(true);
    LOGGER.info("Started MongoDB source task");
  }

  @Override
  public List poll() {
    final long startPoll = time.milliseconds();
    LOGGER.debug("Polling Start: {}", startPoll);
    List sourceRecords = new ArrayList<>();
    TopicMapper topicMapper = sourceConfig.getTopicMapper();
    boolean publishFullDocumentOnly = sourceConfig.getBoolean(PUBLISH_FULL_DOCUMENT_ONLY_CONFIG);
    int maxBatchSize = sourceConfig.getInt(POLL_MAX_BATCH_SIZE_CONFIG);
    long nextUpdate = startPoll + sourceConfig.getLong(POLL_AWAIT_TIME_MS_CONFIG);
    Map partition = createPartitionMap(sourceConfig);

    SchemaAndValueProducer keySchemaAndValueProducer =
        createKeySchemaAndValueProvider(sourceConfig);
    SchemaAndValueProducer valueSchemaAndValueProducer =
        createValueSchemaAndValueProvider(sourceConfig);

    while (isRunning.get()) {
      Optional next = getNextDocument();
      long untilNext = nextUpdate - time.milliseconds();

      if (!next.isPresent()) {
        if (untilNext > 0) {
          LOGGER.debug("Waiting {} ms to poll", untilNext);
          time.sleep(untilNext);
          continue; // Re-check stop flag before continuing
        }
        if (!sourceRecords.isEmpty()) {
          return sourceRecords;
        }
        if (heartbeatManager != null) {
          return heartbeatManager.heartbeat().map(Collections::singletonList).orElse(null);
        }
        return null;
      } else {
        BsonDocument changeStreamDocument = next.get();

        Map sourceOffset = new HashMap<>();
        sourceOffset.put(ID_FIELD, changeStreamDocument.getDocument(ID_FIELD).toJson());
        if (isCopying.get()) {
          sourceOffset.put(COPY_KEY, "true");
        }

        String topicName = topicMapper.getTopic(changeStreamDocument);
        if (topicName.isEmpty()) {
          LOGGER.warn(
              "No topic set. Could not publish the message: {}", changeStreamDocument.toJson());
          return sourceRecords;
        }

        Optional valueDocument = Optional.empty();
        if (publishFullDocumentOnly) {
          if (changeStreamDocument.containsKey(FULL_DOCUMENT)
              && changeStreamDocument.get(FULL_DOCUMENT).isDocument()) {
            valueDocument = Optional.of(changeStreamDocument.getDocument(FULL_DOCUMENT));
          }
        } else {
          valueDocument = Optional.of(changeStreamDocument);
        }

        valueDocument.ifPresent(
            (valueDoc) -> {
              LOGGER.trace("Adding {} to {}: {}", valueDoc, topicName, sourceOffset);

              BsonDocument keyDocument =
                  sourceConfig.getKeyOutputFormat() == OutputFormat.SCHEMA
                      ? changeStreamDocument
                      : new BsonDocument(ID_FIELD, changeStreamDocument.get(ID_FIELD));

              createSourceRecord(
                      partition,
                      keySchemaAndValueProducer,
                      valueSchemaAndValueProducer,
                      sourceOffset,
                      topicName,
                      keyDocument,
                      valueDoc)
                  .map(sourceRecords::add);
            });

        if (sourceRecords.size() == maxBatchSize) {
          LOGGER.debug(
              "Reached '{}': {}, returning records", POLL_MAX_BATCH_SIZE_CONFIG, maxBatchSize);
          return sourceRecords;
        }
      }
    }
    return null;
  }

  private Optional createSourceRecord(
      final Map partition,
      final SchemaAndValueProducer keySchemaAndValueProducer,
      final SchemaAndValueProducer valueSchemaAndValueProducer,
      final Map sourceOffset,
      final String topicName,
      final BsonDocument keyDocument,
      final BsonDocument valueDocument) {

    try {
      SchemaAndValue keySchemaAndValue = keySchemaAndValueProducer.get(keyDocument);
      SchemaAndValue valueSchemaAndValue = valueSchemaAndValueProducer.get(valueDocument);
      return Optional.of(
          new SourceRecord(
              partition,
              sourceOffset,
              topicName,
              keySchemaAndValue.schema(),
              keySchemaAndValue.value(),
              valueSchemaAndValue.schema(),
              valueSchemaAndValue.value()));
    } catch (Exception e) {
      Supplier errorMessage =
          () ->
              format(
                  "Exception creating Source record for: Key=%s Value=%s",
                  keyDocument.toJson(), valueDocument.toJson());
      if (sourceConfig.logErrors()) {
        LOGGER.error(errorMessage.get(), e);
      }
      if (sourceConfig.tolerateErrors()) {
        if (sourceConfig.getString(ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG).isEmpty()) {
          return Optional.empty();
        }
        return Optional.of(
            new SourceRecord(
                partition,
                sourceOffset,
                sourceConfig.getString(ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG),
                Schema.STRING_SCHEMA,
                keyDocument.toJson(),
                Schema.STRING_SCHEMA,
                valueDocument.toJson()));
      }
      throw new DataException(errorMessage.get(), e);
    }
  }

  @Override
  @SuppressWarnings("try")
  public synchronized void stop() {
    // Synchronized because polling blocks and stop can be called from another thread
    LOGGER.info("Stopping MongoDB source task");
    isRunning.set(false);
    isCopying.set(false);

    //noinspection EmptyTryBlock
    try (MongoClient ignored3 = this.mongoClient;
        MongoChangeStreamCursor ignored2 = this.cursor;
        MongoCopyDataManager ignored1 = this.copyDataManager) {
      // just using try-with-resources to ensure they all get closed, even in the case of exceptions
    }

    copyDataManager = null;
    heartbeatManager = null;
    cursor = null;
    mongoClient = null;

    supportsStartAfter = true;
    invalidatedCursor = false;
  }

  void initializeCursorAndHeartbeatManager(
      final Time time, final MongoSourceConfig sourceConfig, final MongoClient mongoClient) {
    cursor = createCursor(sourceConfig, mongoClient);
    heartbeatManager =
        new HeartbeatManager(
            time,
            cursor,
            sourceConfig.getLong(HEARTBEAT_INTERVAL_MS_CONFIG),
            sourceConfig.getString(HEARTBEAT_TOPIC_NAME_CONFIG),
            partitionMap);
  }

  MongoChangeStreamCursor createCursor(
      final MongoSourceConfig sourceConfig, final MongoClient mongoClient) {
    LOGGER.debug("Creating a MongoCursor");
    return tryCreateCursor(sourceConfig, mongoClient, getResumeToken(sourceConfig));
  }

  private MongoChangeStreamCursor tryCreateCursor(
      final MongoSourceConfig sourceConfig,
      final MongoClient mongoClient,
      final BsonDocument resumeToken) {
    try {
      ChangeStreamIterable changeStreamIterable =
          getChangeStreamIterable(sourceConfig, mongoClient);
      if (resumeToken != null && supportsStartAfter) {
        LOGGER.info("Resuming the change stream after the previous offset: {}", resumeToken);
        changeStreamIterable.startAfter(resumeToken);
      } else if (resumeToken != null && !invalidatedCursor) {
        LOGGER.info(
            "Resuming the change stream after the previous offset using resumeAfter: {}",
            resumeToken);
        changeStreamIterable.resumeAfter(resumeToken);
      } else {
        LOGGER.info("New change stream cursor created without offset.");
      }
      return (MongoChangeStreamCursor)
          changeStreamIterable.withDocumentClass(RawBsonDocument.class).cursor();
    } catch (MongoCommandException e) {
      if (resumeToken != null) {
        if (invalidatedResumeToken(e)) {
          invalidatedCursor = true;
          return tryCreateCursor(sourceConfig, mongoClient, null);
        } else if (doesNotSupportsStartAfter(e)) {
          supportsStartAfter = false;
          return tryCreateCursor(sourceConfig, mongoClient, resumeToken);
        } else if (sourceConfig.tolerateErrors() && resumeTokenNotFound(e)) {
          LOGGER.warn(
              "Failed to resume change stream: {} {}\n"
                  + "===================================================================================\n"
                  + "When the resume token is no longer available there is the potential for data loss.\n\n"
                  + "Restarting the change stream with no resume token because `errors.tolerance=all`.\n"
                  + "===================================================================================\n",
              e.getErrorMessage(),
              e.getErrorCode());
          invalidatedCursor = true;
          return tryCreateCursor(sourceConfig, mongoClient, null);
        }
      }
      if (e.getErrorCode() == NAMESPACE_NOT_FOUND_ERROR) {
        LOGGER.info("Namespace not found cursor closed.");
      } else if (e.getErrorCode() == ILLEGAL_OPERATION_ERROR) {
        LOGGER.warn(
            "Illegal $changeStream operation: {} {}\n\n"
                + "=====================================================================================\n"
                + "{}\n\n"
                + "Please Note: Not all aggregation pipeline operations are suitable for modifying the\n"
                + "change stream output. For more information, please see the official documentation:\n"
                + "   https://docs.mongodb.com/manual/changeStreams/\n"
                + "=====================================================================================\n",
            e.getErrorMessage(),
            e.getErrorCode(),
            e.getErrorMessage());
        throw new ConnectException("Illegal $changeStream operation", e);
      } else {
        LOGGER.warn(
            "Failed to resume change stream: {} {}\n\n"
                + "=====================================================================================\n"
                + "If the resume token is no longer available then there is the potential for data loss.\n"
                + "Saved resume tokens are managed by Kafka and stored with the offset data.\n\n"
                + "To restart the change stream with no resume token either: \n"
                + "  * Create a new partition name using the `offset.partition.name` configuration.\n"
                + "  * Set `errors.tolerance=all` and ignore the erroring resume token. \n"
                + "  * Manually remove the old offset from its configured storage.\n\n"
                + "Resetting the offset will allow for the connector to be resume from the latest resume\n"
                + "token. Using `copy.existing=true` ensures that all data will be outputted by the\n"
                + "connector but it will duplicate existing data.\n"
                + "=====================================================================================\n",
            e.getErrorMessage(),
            e.getErrorCode());
        if (resumeTokenNotFound(e)) {
          throw new ConnectException(
              "ResumeToken not found. Cannot create a change stream cursor", e);
        }
      }
      return null;
    }
  }

  private boolean doesNotSupportsStartAfter(final MongoCommandException e) {
    return ((e.getErrorCode() == FAILED_TO_PARSE_ERROR || e.getErrorCode() == UNKNOWN_FIELD_ERROR)
        && e.getErrorMessage().contains("startAfter"));
  }

  private boolean invalidatedResumeToken(final MongoCommandException e) {
    return e.getErrorCode() == INVALIDATED_RESUME_TOKEN_ERROR;
  }

  private boolean resumeTokenNotFound(final MongoCommandException e) {
    String errorMessage = e.getErrorMessage().toLowerCase(Locale.ROOT);
    return errorMessage.contains(RESUME_TOKEN)
        && (errorMessage.contains(NOT_FOUND)
            || errorMessage.contains(DOES_NOT_EXIST)
            || errorMessage.contains(INVALID_RESUME_TOKEN));
  }

  Map createPartitionMap(final MongoSourceConfig sourceConfig) {
    if (partitionMap == null) {
      String partitionName = sourceConfig.getString(MongoSourceConfig.OFFSET_PARTITION_NAME_CONFIG);
      if (partitionName.isEmpty()) {
        partitionName = createDefaultPartitionName(sourceConfig);
      }
      partitionMap = singletonMap(NS_KEY, partitionName);
    }
    return partitionMap;
  }

  Map createLegacyPartitionMap(final MongoSourceConfig sourceConfig) {
    return singletonMap(NS_KEY, createLegacyPartitionName(sourceConfig));
  }

  String createLegacyPartitionName(final MongoSourceConfig sourceConfig) {
    return format(
        "%s/%s.%s",
        sourceConfig.getString(CONNECTION_URI_CONFIG),
        sourceConfig.getString(DATABASE_CONFIG),
        sourceConfig.getString(COLLECTION_CONFIG));
  }

  String createDefaultPartitionName(final MongoSourceConfig sourceConfig) {
    ConnectionString connectionString = sourceConfig.getConnectionString();
    StringBuilder builder = new StringBuilder();
    builder.append(connectionString.isSrvProtocol() ? "mongodb+srv://" : "mongodb://");
    builder.append(String.join(",", connectionString.getHosts()));
    builder.append("/");
    builder.append(sourceConfig.getString(DATABASE_CONFIG));
    if (!sourceConfig.getString(COLLECTION_CONFIG).isEmpty()) {
      builder.append(".");
      builder.append(sourceConfig.getString(COLLECTION_CONFIG));
    }
    return builder.toString();
  }

  /**
   * Checks to see if data should be copied.
   *
   * Copying data is only required if it's been configured and it hasn't already completed.
   *
   * @return true if should copy the existing data.
   */
  private boolean shouldCopyData() {
    Map offset = getOffset(sourceConfig);
    return sourceConfig.getBoolean(COPY_EXISTING_CONFIG)
        && (offset == null || offset.containsKey(COPY_KEY));
  }

  /**
   * This method also is responsible for caching the {@code resumeAfter} value for the change
   * stream.
   */
  private void setCachedResultAndResumeToken() {
    MongoChangeStreamCursor> changeStreamCursor;

    try {
      changeStreamCursor = getChangeStreamIterable(sourceConfig, mongoClient).cursor();
    } catch (MongoCommandException e) {
      if (e.getErrorCode() == NAMESPACE_NOT_FOUND_ERROR) {
        return;
      }
      throw new ConnectException(e);
    }
    ChangeStreamDocument firstResult = changeStreamCursor.tryNext();
    if (firstResult != null) {
      cachedResult =
          new BsonDocumentWrapper<>(
              firstResult,
              ChangeStreamDocument.createCodec(
                  Document.class, MongoClientSettings.getDefaultCodecRegistry()));
    }
    cachedResumeToken =
        firstResult != null ? firstResult.getResumeToken() : changeStreamCursor.getResumeToken();
    changeStreamCursor.close();
  }

  /**
   * Returns the next document to be delivered to Kafka.
   *
   * 

   *
   * 

   *   If copying data is in progress, returns the next result.
   *   
If copying data and all data has been copied and there is a cached result return the
   *       cached result.
   *   
Otherwise, return the next result from the change stream cursor. Creating a new cursor if
   *       necessary.
   * 
   *
   * @return the next document
   */
  private Optional getNextDocument() {
    if (isCopying.get()) {
      Optional result = copyDataManager.poll();
      if (result.isPresent() || copyDataManager.isCopying()) {
        return result;
      }

      // No longer copying
      LOGGER.info("Shutting down executors");
      isCopying.set(false);
      if (cachedResult != null) {
        result = Optional.of(cachedResult);
        cachedResult = null;
        return result;
      }
      LOGGER.info("Finished copying existing data from the collection(s).");
    }

    if (cursor == null) {
      initializeCursorAndHeartbeatManager(time, sourceConfig, mongoClient);
    }

    if (cursor != null) {
      try {
        BsonDocument next = cursor.tryNext();
        // The cursor has been closed by the server
        if (next == null && cursor.getServerCursor() == null) {
          invalidateCursorAndReinitialize();
          next = cursor != null ? cursor.tryNext() : null;
        }
        return Optional.ofNullable(next);
      } catch (Exception e) {
        if (cursor != null) {
          try {
            cursor.close();
          } catch (Exception e1) {
            // ignore
          }
          cursor = null;
        }
        if (isRunning.get()) {
          LOGGER.info(
              "An exception occurred when trying to get the next item from the Change Stream: {}",
              e.getMessage());
        }
        return Optional.empty();
      }
    }
    return Optional.empty();
  }

  private void invalidateCursorAndReinitialize() {
    invalidatedCursor = true;
    cursor.close();
    cursor = null;
    initializeCursorAndHeartbeatManager(time, sourceConfig, mongoClient);
  }

  private ChangeStreamIterable getChangeStreamIterable(
      final MongoSourceConfig sourceConfig, final MongoClient mongoClient) {
    String database = sourceConfig.getString(DATABASE_CONFIG);
    String collection = sourceConfig.getString(COLLECTION_CONFIG);

    Optional> pipeline = sourceConfig.getPipeline();
    ChangeStreamIterable changeStream;
    if (database.isEmpty()) {
      LOGGER.info("Watching all changes on the cluster");
      changeStream = pipeline.map(mongoClient::watch).orElse(mongoClient.watch());
    } else if (collection.isEmpty()) {
      LOGGER.info("Watching for database changes on '{}'", database);
      MongoDatabase db = mongoClient.getDatabase(database);
      changeStream = pipeline.map(db::watch).orElse(db.watch());
    } else {
      LOGGER.info("Watching for collection changes on '{}.{}'", database, collection);
      MongoCollection coll = mongoClient.getDatabase(database).getCollection(collection);
      changeStream = pipeline.map(coll::watch).orElse(coll.watch());
    }

    int batchSize = sourceConfig.getInt(BATCH_SIZE_CONFIG);
    if (batchSize > 0) {
      changeStream.batchSize(batchSize);
    }
    sourceConfig.getFullDocument().ifPresent(changeStream::fullDocument);
    sourceConfig.getCollation().ifPresent(changeStream::collation);
    return changeStream;
  }

  Map getOffset(final MongoSourceConfig sourceConfig) {
    if (context != null) {
      Map offset =
          context.offsetStorageReader().offset(createPartitionMap(sourceConfig));
      if (offset == null
          && sourceConfig.getString(MongoSourceConfig.OFFSET_PARTITION_NAME_CONFIG).isEmpty()) {
        offset = context.offsetStorageReader().offset(createLegacyPartitionMap(sourceConfig));
      }
      return offset;
    }
    return null;
  }

  BsonDocument getResumeToken(final MongoSourceConfig sourceConfig) {
    BsonDocument resumeToken = null;
    if (cachedResumeToken != null) {
      resumeToken = cachedResumeToken;
      cachedResumeToken = null;
    } else if (invalidatedCursor) {
      invalidatedCursor = false;
    } else {
      Map offset = getOffset(sourceConfig);
      if (offset != null && offset.containsKey(ID_FIELD) && !offset.containsKey(COPY_KEY)) {
        resumeToken = BsonDocument.parse((String) offset.get(ID_FIELD));
        if (offset.containsKey(HEARTBEAT_KEY)) {
          LOGGER.info("Resume token from heartbeat: {}", resumeToken);
        }
      }
    }
    return resumeToken;
  }
}