All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mongodb.kafka.connect.source.MongoCopyDataManager Maven / Gradle / Ivy

There is a newer version: 1.13.1
Show newest version
/*
 * Copyright 2008-present MongoDB, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.mongodb.kafka.connect.source;

import static com.mongodb.kafka.connect.source.MongoSourceConfig.COLLECTION_CONFIG;
import static com.mongodb.kafka.connect.source.MongoSourceConfig.DATABASE_CONFIG;
import static java.lang.String.format;
import static java.util.Collections.singletonList;
import static java.util.stream.Collectors.toList;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.regex.Pattern;

import org.apache.kafka.connect.errors.ConnectException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.bson.BsonBinaryReader;
import org.bson.BsonDocument;
import org.bson.BsonType;
import org.bson.RawBsonDocument;
import org.bson.conversions.Bson;

import com.mongodb.MongoNamespace;
import com.mongodb.client.MongoClient;

import com.mongodb.kafka.connect.source.MongoSourceConfig.StartupConfig.CopyExistingConfig;

/**
 * Copy Data Manager
 *
 * 
    *
  1. Gets all namespaces to copy. eg. A single collection, all collections in a database or all * collections in all databases. *
  2. For each namespace, submit to the executors an copy existing task. *
  3. Each copy existing task, runs an aggregation pipeline on the namespace, to mimic an insert * document and adds the results to the queue. *
  4. The poll method returns documents from the queue. *
*/ class MongoCopyDataManager implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(MongoCopyDataManager.class); private static final String NAMESPACE_FIELD = "ns"; static final String ALT_NAMESPACE_FIELD = "__"; private static final byte[] NAMESPACE_BYTES = NAMESPACE_FIELD.getBytes(StandardCharsets.UTF_8); private static final String PIPELINE_TEMPLATE = format( "{$replaceRoot: " + "{newRoot: {" + "_id: {_id: '$_id', copyingData: true}, " + "operationType: 'insert', " + "%s: {db: '%%s', coll: '%%s'}" + "documentKey: {_id: '$_id'}, " + "fullDocument: '$$ROOT'}}" + "}", NAMESPACE_FIELD); private static final BsonDocument ADD_ALT_NAMESPACE_STAGE = BsonDocument.parse( format("{'$addFields': {'%s': '$%s'}}", ALT_NAMESPACE_FIELD, NAMESPACE_FIELD)); private static final BsonDocument UNSET_ORIGINAL_NAMESPACE_STAGE = BsonDocument.parse(format("{'$project': {'%s': 0}}", NAMESPACE_FIELD)); private volatile boolean closed; private volatile Exception errorException; private final AtomicInteger namespacesToCopy; private final MongoSourceConfig sourceConfig; private final MongoClient mongoClient; private final ExecutorService executor; private final ArrayBlockingQueue queue; MongoCopyDataManager(final MongoSourceConfig sourceConfig, final MongoClient mongoClient) { this.sourceConfig = sourceConfig; this.mongoClient = mongoClient; List namespaces = selectNamespaces(sourceConfig, mongoClient); LOGGER.info("Copying existing data on the following namespaces: {}", namespaces); namespacesToCopy = new AtomicInteger(namespaces.size()); CopyExistingConfig copyConfig = sourceConfig.getStartupConfig().copyExistingConfig(); queue = new ArrayBlockingQueue<>(copyConfig.queueSize()); executor = Executors.newFixedThreadPool( Math.max(1, Math.min(namespaces.size(), copyConfig.maxThreads()))); namespaces.forEach(n -> executor.submit(() -> copyDataFrom(n))); } Optional poll() { if (errorException != null) { if (!closed) { close(); } throw new ConnectException(errorException); } if (namespacesToCopy.get() == 0) { close(); } return Optional.ofNullable(queue.poll()); } boolean isCopying() { return namespacesToCopy.get() > 0 || !queue.isEmpty(); } @Override public void close() { if (!closed) { closed = true; LOGGER.debug("Shutting down copy data manager executors"); executor.shutdownNow(); } } private void copyDataFrom(final MongoNamespace namespace) { LOGGER.debug("Copying existing data from: {}", namespace.getFullName()); try { mongoClient .getDatabase(namespace.getDatabaseName()) .getCollection(namespace.getCollectionName(), RawBsonDocument.class) .aggregate(createPipeline(sourceConfig, namespace)) .allowDiskUse(sourceConfig.getStartupConfig().copyExistingConfig().allowDiskUse()) .forEach(this::putToQueue); namespacesToCopy.decrementAndGet(); } catch (Exception e) { errorException = e; } } private void putToQueue(final RawBsonDocument bsonDocument) { try { queue.put(convertDocument(bsonDocument)); } catch (InterruptedException e) { throw new RuntimeException(e); } } static List selectNamespaces( final MongoSourceConfig sourceConfig, final MongoClient mongoClient) { String database = sourceConfig.getString(DATABASE_CONFIG); String collection = sourceConfig.getString(COLLECTION_CONFIG); String namespacesRegex = sourceConfig.getStartupConfig().copyExistingConfig().namespaceRegex(); List namespaces; if (database.isEmpty()) { namespaces = getCollections(mongoClient); } else if (collection.isEmpty()) { namespaces = getCollections(mongoClient, database); } else { namespaces = singletonList(createNamespace(database, collection)); } if (!namespacesRegex.isEmpty()) { Predicate predicate = Pattern.compile(namespacesRegex).asPredicate(); namespaces = namespaces.stream().filter(n -> predicate.test(n.getFullName())).collect(toList()); } return namespaces; } static List createPipeline(final MongoSourceConfig cfg, final MongoNamespace namespace) { List pipeline = new ArrayList<>(); cfg.getStartupConfig().copyExistingConfig().pipeline().map(pipeline::addAll); pipeline.add( BsonDocument.parse( format(PIPELINE_TEMPLATE, namespace.getDatabaseName(), namespace.getCollectionName()))); cfg.getPipeline().map(pipeline::addAll); pipeline.add(ADD_ALT_NAMESPACE_STAGE); pipeline.add(UNSET_ORIGINAL_NAMESPACE_STAGE); return pipeline; } static RawBsonDocument convertDocument(final RawBsonDocument original) { ByteBuffer sourceBuffer = original.getByteBuffer().asNIO(); BsonBinaryReader reader = new BsonBinaryReader(sourceBuffer); int currentPosition = 0; reader.readStartDocument(); while (reader.readBsonType() != BsonType.END_OF_DOCUMENT) { if (reader.readName().equals(ALT_NAMESPACE_FIELD)) { currentPosition++; // Adjust the current position to include the bson type byte[] sourceBytes = sourceBuffer.array(); // Convert the namespace field in situ for (byte namespaceByte : NAMESPACE_BYTES) { sourceBytes[currentPosition++] = namespaceByte; } return original; } reader.skipValue(); currentPosition = reader.getBsonInput().getPosition(); } return original; } private static List getCollections(final MongoClient mongoClient) { return mongoClient.listDatabaseNames().into(new ArrayList<>()).stream() .filter(s -> !(s.startsWith("admin") || s.startsWith("config") || s.startsWith("local"))) .map(d -> getCollections(mongoClient, d)) .flatMap(Collection::stream) .collect(toList()); } private static List getCollections( final MongoClient mongoClient, final String database) { return mongoClient.getDatabase(database).listCollectionNames().into(new ArrayList<>()).stream() .filter(s -> !s.startsWith("system.")) .map(c -> createNamespace(database, c)) .collect(toList()); } private static MongoNamespace createNamespace(final String database, final String collection) { return new MongoNamespace(database, collection); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy