All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pulsar.io.mongodb.MongoSource Maven / Gradle / Ivy

There is a newer version: 4.0.0.6
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pulsar.io.mongodb;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.client.model.changestream.ChangeStreamDocument;
import com.mongodb.client.model.changestream.FullDocument;
import com.mongodb.reactivestreams.client.ChangeStreamPublisher;
import com.mongodb.reactivestreams.client.MongoClient;
import com.mongodb.reactivestreams.client.MongoClients;
import com.mongodb.reactivestreams.client.MongoCollection;
import com.mongodb.reactivestreams.client.MongoDatabase;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.pulsar.functions.api.Record;
import org.apache.pulsar.io.core.PushSource;
import org.apache.pulsar.io.core.SourceContext;
import org.apache.pulsar.io.core.annotations.Connector;
import org.apache.pulsar.io.core.annotations.IOType;
import org.bson.Document;
import org.reactivestreams.Subscriber;
import org.reactivestreams.Subscription;

/**
 * The base class for MongoDB sources.
 */
@Connector(
        name = "mongo",
        type = IOType.SOURCE,
        help = "A source connector that sends mongodb documents to pulsar",
        configClass = MongoConfig.class
)
@Slf4j
public class MongoSource extends PushSource {

    private final Supplier clientProvider;

    private MongoConfig mongoConfig;

    private MongoClient mongoClient;

    private Thread streamThread;

    private ChangeStreamPublisher stream;


    public MongoSource() {
        this(null);
    }

    public MongoSource(Supplier clientProvider) {
        this.clientProvider = clientProvider;
    }

    @Override
    public void open(Map config, SourceContext sourceContext) throws Exception {
        log.info("Open MongoDB Source");

        mongoConfig = MongoConfig.load(config);
        mongoConfig.validate(false, false);

        if (clientProvider != null) {
            mongoClient = clientProvider.get();
        } else {
            mongoClient = MongoClients.create(mongoConfig.getMongoUri());
        }

        if (StringUtils.isEmpty(mongoConfig.getDatabase())) {
            // Watch all databases
            log.info("Watch all");
            stream = mongoClient.watch();

        } else {
            final MongoDatabase db = mongoClient.getDatabase(mongoConfig.getDatabase());

            if (StringUtils.isEmpty(mongoConfig.getCollection())) {
                // Watch all collections in a database
                log.info("Watch db: {}", db.getName());
                stream = db.watch();

            } else {
                // Watch a collection

                final MongoCollection collection = db.getCollection(mongoConfig.getCollection());
                log.info("Watch collection: {} {}", db.getName(), mongoConfig.getCollection());
                stream = collection.watch();
            }
        }

        stream.batchSize(mongoConfig.getBatchSize()).fullDocument(FullDocument.UPDATE_LOOKUP);

        stream.subscribe(new Subscriber>() {
            private ObjectMapper mapper = new ObjectMapper();
            private Subscription subscription;

            @Override
            public void onSubscribe(Subscription subscription) {
                this.subscription = subscription;
                this.subscription.request(Integer.MAX_VALUE);
            }

            @Override
            public void onNext(ChangeStreamDocument doc) {
                try {
                    log.info("New change doc: {}", doc);

                    // Build a record with the essential information
                    final Map recordValue = new HashMap<>();
                    recordValue.put("fullDocument", doc.getFullDocument());
                    recordValue.put("ns", doc.getNamespace());
                    recordValue.put("operation", doc.getOperationType());

                    consume(new DocRecord(
                            Optional.of(doc.getDocumentKey().toJson()),
                            mapper.writeValueAsString(recordValue).getBytes(StandardCharsets.UTF_8)));

                } catch (JsonProcessingException e) {
                    log.error("Processing doc from mongo", e);
                }
            }

            @Override
            public void onError(Throwable error) {
                log.error("Subscriber error", error);
            }

            @Override
            public void onComplete() {
                log.info("Subscriber complete");
            }
        });

    }

    @Override
    public void close() throws Exception {
        if (mongoClient != null) {
            mongoClient.close();
        }
    }

    @Data
    private static class DocRecord implements Record {
        private final Optional key;
        private final byte[] value;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy