![JAR search and dependency download from the Maven repository](/logo.png)
org.elasticsearch.river.mongodb.CollectionSlurper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-river-mongodb Show documentation
Show all versions of elasticsearch-river-mongodb Show documentation
MongoDB River for ElasticSearch
The newest version!
package org.elasticsearch.river.mongodb;
import java.util.concurrent.atomic.AtomicLong;
import org.bson.BasicBSONObject;
import org.bson.types.ObjectId;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.river.mongodb.util.MongoDBHelper;
import org.elasticsearch.river.mongodb.util.MongoDBRiverHelper;
import com.google.common.base.Preconditions;
import com.mongodb.BasicDBObject;
import com.mongodb.CommandResult;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoCursorNotFoundException;
import com.mongodb.MongoInterruptedException;
import com.mongodb.MongoSocketException;
import com.mongodb.MongoTimeoutException;
import com.mongodb.QueryOperators;
import com.mongodb.gridfs.GridFS;
import com.mongodb.gridfs.GridFSDBFile;
import com.mongodb.gridfs.GridFSFile;
class CollectionSlurper extends MongoDBRiverComponent {
private final MongoDBRiverDefinition definition;
private final SharedContext context;
private final Client esClient;
private final MongoClient mongoClient;
private final DB slurpedDb;
private final AtomicLong totalDocuments = new AtomicLong();
public CollectionSlurper(MongoDBRiver river, MongoClient mongoClient) {
super(river);
this.definition = river.definition;
this.context = river.context;
this.esClient = river.esClient;
this.mongoClient = mongoClient;
this.slurpedDb = mongoClient.getDB(definition.getMongoDb());
}
/**
* Import initial contents from the {@code definition}
*
* @param timestamp the timestamp to use for the last imported document
*/
public void importInitial(Timestamp> timestamp) {
try {
if (!isIndexEmpty()) {
// MongoDB would delete the index and re-attempt the import
// We should probably do that too or at least have an option for it
// https://groups.google.com/d/msg/mongodb-user/hrOuS-lpMeI/opP6l0gndSEJ
logger.error("Cannot import collection {} into existing index", definition.getMongoCollection());
MongoDBRiverHelper.setRiverStatus(
esClient, definition.getRiverName(), Status.INITIAL_IMPORT_FAILED);
return;
}
if (definition.isImportAllCollections()) {
for (String name : slurpedDb.getCollectionNames()) {
if (name.length() < 7 || !name.substring(0, 7).equals("system.")) {
DBCollection collection = slurpedDb.getCollection(name);
importCollection(collection, timestamp);
}
}
} else {
DBCollection collection = slurpedDb.getCollection(definition.getMongoCollection());
importCollection(collection, timestamp);
}
} catch (MongoInterruptedException | InterruptedException e) {
logger.error("river-mongodb slurper interrupted");
Thread.currentThread().interrupt();
} catch (Exception e) {
logger.error("Exception in initial import", e);
logger.debug("Total documents inserted so far: {}", totalDocuments.get());
Thread.currentThread().interrupt();
}
}
protected boolean isIndexEmpty() {
return MongoDBRiver.getIndexCount(esClient, definition) == 0;
}
/**
* Import a single collection into the index
*
* @param collection the collection to import
* @param timestamp the timestamp to use for the last imported document
* @throws InterruptedException
* if the blocking queue stream is interrupted while waiting
*/
public void importCollection(DBCollection collection, Timestamp> timestamp) throws InterruptedException {
// TODO: ensure the index type is empty
// DBCollection slurpedCollection =
// slurpedDb.getCollection(definition.getMongoCollection());
logger.info("MongoDBRiver is beginning initial import of " + collection.getFullName());
boolean inProgress = true;
String lastId = null;
while (inProgress) {
DBCursor cursor = null;
try {
if (definition.isDisableIndexRefresh()) {
updateIndexRefresh(definition.getIndexName(), -1L);
}
if (!definition.isMongoGridFS()) {
if (logger.isTraceEnabled()) {
// Note: collection.count() is expensive on TokuMX
logger.trace("Collection {} - count: {}", collection.getName(), safeCount(collection, timestamp.getClass()));
}
long count = 0;
cursor = collection
.find(getFilterForInitialImport(definition.getMongoCollectionFilter(), lastId))
.sort(new BasicDBObject("_id", 1));
while (cursor.hasNext() && context.getStatus() == Status.RUNNING) {
DBObject object = cursor.next();
count++;
if (cursor.hasNext()) {
lastId = addInsertToStream(null, applyFieldFilter(object), collection.getName());
} else {
logger.debug("Last entry for initial import of {} - add timestamp: {}", collection.getFullName(), timestamp);
lastId = addInsertToStream(timestamp, applyFieldFilter(object), collection.getName());
}
}
inProgress = false;
logger.info("Number of documents indexed in initial import of {}: {}", collection.getFullName(), count);
} else {
// TODO: To be optimized.
// https://github.com/mongodb/mongo-java-driver/pull/48#issuecomment-25241988
// possible option: Get the object id list from .fs collection then call GriDFS.findOne
GridFS grid = new GridFS(mongoClient.getDB(definition.getMongoDb()), definition.getMongoCollection());
cursor = grid.getFileList();
while (cursor.hasNext()) {
DBObject object = cursor.next();
if (object instanceof GridFSDBFile) {
GridFSDBFile file = grid.findOne(new ObjectId(object.get(MongoDBRiver.MONGODB_ID_FIELD).toString()));
if (cursor.hasNext()) {
lastId = addInsertToStream(null, file);
} else {
logger.debug("Last entry for initial import of {} - add timestamp: {}", collection.getFullName(), timestamp);
lastId = addInsertToStream(timestamp, file);
}
}
}
inProgress = false;
}
} catch (MongoSocketException | MongoTimeoutException | MongoCursorNotFoundException e) {
logger.info("Initial import - {} - {}. Will retry.", e.getClass().getSimpleName(), e.getMessage());
logger.debug("Total documents inserted so far: {}", totalDocuments.get());
Thread.sleep(MongoDBRiver.MONGODB_RETRY_ERROR_DELAY_MS);
} finally {
if (cursor != null) {
logger.trace("Closing initial import cursor");
cursor.close();
}
if (definition.isDisableIndexRefresh()) {
updateIndexRefresh(definition.getIndexName(), TimeValue.timeValueSeconds(1));
}
}
}
}
/**
* Only calls DBCollection.count() when using vanilla MongoDB; otherwise gets estimate from collection.getStats()
*/
private String safeCount(DBCollection collection, Class extends Timestamp> type) {
if (type.equals(Timestamp.BSON.class)) {
return "" + collection.count(); // Vanilla MongoDB can quickly return precise count
}
CommandResult stats = collection.getStats();
return "~" + (!stats.containsField("count") ? 0l : stats.getLong("count"));
}
private BasicDBObject getFilterForInitialImport(BasicDBObject filter, String id) {
Preconditions.checkNotNull(filter);
if (id == null) {
return filter;
}
BasicDBObject idFilter = new BasicDBObject(MongoDBRiver.MONGODB_ID_FIELD, new BasicBSONObject(QueryOperators.GT, id));
if (filter.equals(new BasicDBObject())) {
return idFilter;
}
return new BasicDBObject(QueryOperators.AND, ImmutableList.of(filter, idFilter));
}
private void updateIndexRefresh(String name, Object value) {
esClient.admin().indices().prepareUpdateSettings(name).setSettings(ImmutableMap.of("index.refresh_interval", value)).get();
}
private DBObject applyFieldFilter(DBObject object) {
if (object instanceof GridFSFile) {
GridFSFile file = (GridFSFile) object;
DBObject metadata = file.getMetaData();
if (metadata != null) {
file.setMetaData(applyFieldFilter(metadata));
}
} else {
object = MongoDBHelper.applyExcludeFields(object, definition.getExcludeFields());
object = MongoDBHelper.applyIncludeFields(object, definition.getIncludeFields());
}
return object;
}
private String addInsertToStream(final Timestamp> currentTimestamp, final DBObject data) throws InterruptedException {
return addInsertToStream(currentTimestamp, data, definition.getMongoCollection());
}
private String addInsertToStream(final Timestamp> currentTimestamp, final DBObject data, final String collection)
throws InterruptedException {
totalDocuments.incrementAndGet();
addToStream(Operation.INSERT, currentTimestamp, data, collection);
if (data == null) {
return null;
} else {
return data.containsField(MongoDBRiver.MONGODB_ID_FIELD) ? data.get(MongoDBRiver.MONGODB_ID_FIELD).toString() : null;
}
}
private void addToStream(final Operation operation, final Timestamp> currentTimestamp, final DBObject data, final String collection)
throws InterruptedException {
if (logger.isTraceEnabled()) {
String dataString = data.toString();
if (dataString.length() > 400) {
logger.trace("addToStream - operation [{}], currentTimestamp [{}], data (_id:[{}], serialized length:{}), collection [{}]",
operation, currentTimestamp, data.get("_id"), dataString.length(), collection);
} else {
logger.trace("addToStream - operation [{}], currentTimestamp [{}], data [{}], collection [{}]",
operation, currentTimestamp, dataString, collection);
}
}
if (operation == Operation.DROP_DATABASE) {
logger.info("addToStream - Operation.DROP_DATABASE, currentTimestamp [{}], data [{}], collection [{}]",
currentTimestamp, data, collection);
if (definition.isImportAllCollections()) {
for (String name : slurpedDb.getCollectionNames()) {
logger.info("addToStream - isImportAllCollections - Operation.DROP_DATABASE, currentTimestamp [{}], data [{}], collection [{}]",
currentTimestamp, data, name);
context.getStream().put(new MongoDBRiver.QueueEntry(currentTimestamp, Operation.DROP_COLLECTION, data, name));
}
} else {
context.getStream().put(new MongoDBRiver.QueueEntry(currentTimestamp, Operation.DROP_COLLECTION, data, collection));
}
} else {
context.getStream().put(new MongoDBRiver.QueueEntry(currentTimestamp, operation, data, collection));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy