![JAR search and dependency download from the Maven repository](/logo.png)
org.projectnessie.versioned.mongodb.MongoDBStore Maven / Gradle / Ivy
/*
* Copyright (C) 2020 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.versioned.mongodb;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.mongodb.ConnectionString;
import com.mongodb.MongoClientSettings;
import com.mongodb.WriteConcern;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.InsertManyOptions;
import com.mongodb.client.model.UpdateOptions;
import com.mongodb.client.result.UpdateResult;
import com.mongodb.reactivestreams.client.MongoClient;
import com.mongodb.reactivestreams.client.MongoClients;
import com.mongodb.reactivestreams.client.MongoCollection;
import com.mongodb.reactivestreams.client.MongoDatabase;
import java.time.Duration;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.bson.BsonBinary;
import org.bson.BsonReader;
import org.bson.BsonWriter;
import org.bson.Document;
import org.bson.codecs.Codec;
import org.bson.codecs.DecoderContext;
import org.bson.codecs.EncoderContext;
import org.bson.codecs.configuration.CodecProvider;
import org.bson.codecs.configuration.CodecRegistries;
import org.bson.codecs.configuration.CodecRegistry;
import org.projectnessie.versioned.impl.EntityStoreHelper;
import org.projectnessie.versioned.impl.condition.ConditionExpression;
import org.projectnessie.versioned.impl.condition.UpdateExpression;
import org.projectnessie.versioned.store.Id;
import org.projectnessie.versioned.store.LoadOp;
import org.projectnessie.versioned.store.LoadStep;
import org.projectnessie.versioned.store.NotFoundException;
import org.projectnessie.versioned.store.SaveOp;
import org.projectnessie.versioned.store.Store;
import org.projectnessie.versioned.store.StoreOperationException;
import org.projectnessie.versioned.store.ValueType;
import org.projectnessie.versioned.tiered.BaseValue;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
/**
* This class implements the Store interface that is used by Nessie as a backing store for
* versioning of it's Git like behaviour. The MongoDbStore connects to an external MongoDB server.
*/
public class MongoDBStore implements Store {
/** Pair of a collection to the set of IDs to be loaded. */
private static class CollectionLoadIds {
final ValueType> type;
final MongoCollection collection;
final List ids;
CollectionLoadIds(ValueType> type, MongoCollection collection, List ops) {
this.type = type;
this.collection = collection;
this.ids = ops;
}
}
// Mongo has a 16MB limit on documents, which also pertains to the input query. Given that we use
// IN for loads,
// restrict the number of IDs to avoid going above that limit, and to take advantage of the async
// nature of the
// requests.
@VisibleForTesting static final int LOAD_SIZE = 1_000;
private final MongoStoreConfig config;
private final MongoClientSettings mongoClientSettings;
private MongoClient mongoClient;
private MongoDatabase mongoDatabase;
private final Duration timeout;
private Map, MongoCollection> collections;
/**
* Creates a store ready for connection to a MongoDB instance.
*
* @param config the configuration for the store.
*/
public MongoDBStore(MongoStoreConfig config) {
this.config = config;
this.timeout = Duration.ofMillis(config.getTimeoutMs());
this.collections = new HashMap<>();
this.mongoClientSettings =
MongoClientSettings.builder()
.applyConnectionString(new ConnectionString(config.getConnectionString()))
.codecRegistry(
CodecRegistries.fromProviders(
new CodecProvider() {
@SuppressWarnings("unchecked")
@Override
public Codec get(Class clazz, CodecRegistry registry) {
return clazz == Id.class ? (Codec) ID_CODEC_INSTANCE : null;
}
},
MongoClientSettings.getDefaultCodecRegistry()))
.writeConcern(WriteConcern.MAJORITY)
.build();
}
/**
* Gets a handle to an existing database or get a handle to a MongoDatabase instance if it does
* not exist. The new database will be lazily created. Since MongoDB creates databases and
* collections if they do not exist, there is no need to validate the presence of either before
* they are used. This creates or retrieves collections that map 1:1 to the enumerates in {@link
* org.projectnessie.versioned.store.ValueType}
*/
@Override
public void start() {
mongoClient = MongoClients.create(mongoClientSettings);
mongoDatabase = mongoClient.getDatabase(config.getDatabaseName());
// Initialise collections for each ValueType.
collections =
ValueType.values().stream()
.collect(
ImmutableMap., ValueType>, MongoCollection>toImmutableMap(
v -> v,
v -> {
String collectionName = v.getTableName(config.getTablePrefix());
return mongoDatabase.getCollection(collectionName);
}));
if (config.initializeDatabase()) {
// make sure we have an empty l1 (ignore result, doesn't matter)
EntityStoreHelper.storeMinimumEntities(this::putIfAbsent);
}
}
/**
* Closes the connection this manager creates to a database. If the connection is already closed
* this method has no effect.
*/
@Override
public void close() {
if (null != mongoClient) {
mongoClient.close();
}
}
@SuppressWarnings({"unchecked", "rawtypes"})
@Override
public void load(LoadStep loadstep) throws NotFoundException {
for (LoadStep step = loadstep; step != null; step = step.getNext().orElse(null)) {
final Map> idLoadOps =
step.getOps().collect(Collectors.toMap(LoadOp::getId, Function.identity()));
Flux.fromStream(step.getOps())
.groupBy(LoadOp::getValueType)
.flatMap(
entry -> {
ValueType> type = entry.key();
MongoCollection collection = getCollection(type);
return entry
.map(LoadOp::getId)
.buffer(LOAD_SIZE)
.map(l -> new CollectionLoadIds(type, collection, l));
})
.flatMap(entry -> entry.collection.find(Filters.in(MongoBaseValue.ID, entry.ids)))
.handle(
(op, sink) -> {
// Process each of the loaded entries.
Id id = MongoSerDe.deserializeId(op, MongoBaseValue.ID);
LoadOp loadOp = idLoadOps.remove(id);
MongoSerDe.produceToConsumer(op, loadOp.getValueType(), loadOp.getReceiver());
loadOp.done();
})
.blockLast(timeout);
// Check if there were any missed ops.
final Collection missedIds =
idLoadOps.values().stream().map(e -> e.getId().toString()).collect(Collectors.toList());
if (!missedIds.isEmpty()) {
throw new NotFoundException(
String.format("Requested object IDs missing: %s", String.join(", ", missedIds)));
}
}
}
@Override
public > boolean putIfAbsent(SaveOp saveOp) {
final MongoCollection collection = getCollection(saveOp.getType());
// Use upsert so that a document is created if the filter does not match. The update operator is
// only $setOnInsert
// so no action is triggered on a simple update, only on insert.
final UpdateResult result =
Mono.from(
collection.updateOne(
Filters.eq(MongoBaseValue.ID, saveOp.getId()),
MongoSerDe.bsonForValueType(saveOp, "$setOnInsert"),
new UpdateOptions().upsert(true)))
.block(timeout);
return result != null && result.getUpsertedId() != null;
}
@Override
public > void put(
SaveOp saveOp, Optional conditionUnAliased) {
// TODO: Handle ConditionExpressions.
if (conditionUnAliased.isPresent()) {
throw new UnsupportedOperationException(
"ConditionExpressions are not supported with MongoDB yet.");
}
final MongoCollection collection = getCollection(saveOp.getType());
// Use upsert so that if an item does not exist, it will be inserted.
final UpdateResult result =
Mono.from(
collection.updateOne(
Filters.eq(MongoBaseValue.ID, saveOp.getId()),
MongoSerDe.bsonForValueType(saveOp, "$set"),
new UpdateOptions().upsert(true)))
.block(timeout);
if (result == null || (result.getModifiedCount() != 0 && result.getUpsertedId() == null)) {
throw new StoreOperationException(
String.format(
"Update of %s %s did not succeed", saveOp.getType().name(), saveOp.getId()));
}
}
@Override
public > boolean delete(
ValueType type, Id id, Optional condition) {
throw new UnsupportedOperationException();
}
@SuppressWarnings({"rawtypes", "unchecked"})
@Override
public void save(List> ops) {
Map, List> perType =
ops.stream().collect(Collectors.groupingBy(SaveOp::getType));
Flux.fromIterable(perType.entrySet())
.flatMap(
entry ->
((MongoCollection) writeCollection(entry.getKey()))
.insertMany(entry.getValue(), new InsertManyOptions().ordered(false)))
.blockLast(timeout);
}
@Override
public > void loadSingle(ValueType valueType, Id id, C consumer) {
final MongoCollection collection = getCollection(valueType);
Document found = Mono.from(collection.find(Filters.eq(MongoBaseValue.ID, id))).block(timeout);
if (null == found) {
throw new NotFoundException(String.format("Unable to load item with ID: %s", id));
}
MongoSerDe.produceToConsumer(found, valueType, consumer);
}
@Override
public > boolean update(
ValueType type,
Id id,
UpdateExpression update,
Optional condition,
Optional> consumer)
throws NotFoundException {
throw new UnsupportedOperationException();
}
@Override
public > Stream> getValues(ValueType type) {
// TODO: Can this be optimized to not collect the elements before streaming them?
// TODO: Could this benefit from paging?
return Flux.from(this.getCollection(type).find())
.toStream()
.map(d -> producer -> MongoSerDe.produceToConsumer(d, type, producer));
}
/** Clear the contents of all the Nessie collections. Only for testing purposes. */
@VisibleForTesting
void resetCollections() {
Flux.fromIterable(collections.values())
.flatMap(collection -> collection.deleteMany(Filters.ne("_id", "s")))
.blockLast(timeout);
}
@SuppressWarnings("rawtypes")
private MongoCollection writeCollection(ValueType> type) {
Codec codec =
new Codec() {
@Override
public SaveOp decode(BsonReader bsonReader, DecoderContext decoderContext) {
throw new UnsupportedOperationException();
}
@SuppressWarnings("unchecked")
@Override
public void encode(BsonWriter bsonWriter, SaveOp o, EncoderContext encoderContext) {
MongoSerDe.serializeEntity(bsonWriter, o);
}
@Override
public Class getEncoderClass() {
return SaveOp.class;
}
};
return this.getCollection(type, codec);
}
private MongoCollection getCollection(ValueType> valueType, Codec> codec) {
return getCollection(valueType)
.withCodecRegistry(
new CodecRegistry() {
@Override
public Codec get(Class clazz, CodecRegistry codecRegistry) {
return get(clazz);
}
@SuppressWarnings("unchecked")
@Override
public Codec get(Class clazz) {
return (Codec) (clazz == Id.class ? ID_CODEC_INSTANCE : codec);
}
});
}
private MongoCollection getCollection(ValueType> valueType) {
return checkNotNull(
collections.get(valueType), "Unsupported Entity type: %s", valueType.name());
}
private static class IdCodec implements Codec {
@Override
public Id decode(BsonReader bsonReader, DecoderContext decoderContext) {
return Id.of(bsonReader.readBinaryData().getData());
}
@Override
public void encode(BsonWriter bsonWriter, Id id, EncoderContext encoderContext) {
bsonWriter.writeBinaryData(new BsonBinary(id.toBytes()));
}
@Override
public Class getEncoderClass() {
return Id.class;
}
}
static final IdCodec ID_CODEC_INSTANCE = new IdCodec();
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy