com.couchbase.client.core.transaction.cleanup.TransactionsCleaner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of core-io Show documentation
Show all versions of core-io Show documentation
The official Couchbase JVM Core IO Library
/*
* Copyright 2022 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.couchbase.client.core.transaction.cleanup;
import com.couchbase.client.core.Core;
import com.couchbase.client.core.annotation.Stability;
import com.couchbase.client.core.cnc.Event;
import com.couchbase.client.core.cnc.RequestTracer;
import com.couchbase.client.core.cnc.TracingIdentifiers;
import com.couchbase.client.core.io.CollectionIdentifier;
import com.couchbase.client.core.msg.kv.SubdocCommandType;
import com.couchbase.client.core.msg.kv.SubdocGetResponse;
import com.couchbase.client.core.msg.kv.SubdocMutateRequest;
import com.couchbase.client.core.transaction.components.ActiveTransactionRecordEntry;
import com.couchbase.client.core.transaction.components.ActiveTransactionRecordUtil;
import com.couchbase.client.core.transaction.components.DocRecord;
import com.couchbase.client.core.transaction.components.DocumentGetter;
import com.couchbase.client.core.transaction.components.DurabilityLevelUtil;
import com.couchbase.client.core.transaction.forwards.ForwardCompatibility;
import com.couchbase.client.core.transaction.forwards.ForwardCompatibilityStage;
import com.couchbase.client.core.transaction.forwards.Supported;
import com.couchbase.client.core.transaction.support.AttemptState;
import com.couchbase.client.core.transaction.support.OptionsUtil;
import com.couchbase.client.core.transaction.support.SpanWrapper;
import com.couchbase.client.core.transaction.support.SpanWrapperUtil;
import com.couchbase.client.core.transaction.support.TransactionFields;
import com.couchbase.client.core.transaction.CoreTransactionGetResult;
import com.couchbase.client.core.transaction.error.internal.ErrorClass;
import com.couchbase.client.core.cnc.events.transaction.TransactionCleanupAttemptEvent;
import com.couchbase.client.core.transaction.log.CoreTransactionLogger;
import com.couchbase.client.core.transaction.util.DebugUtil;
import com.couchbase.client.core.transaction.util.TransactionKVHandler;
import com.couchbase.client.core.transaction.util.TriFunction;
import com.couchbase.client.core.transaction.util.MeteringUnits;
import com.couchbase.client.core.util.Bytes;
import com.couchbase.client.core.util.CbPreconditions;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.util.annotation.Nullable;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
/**
* Handles cleaning up expired/lost transactions.
*
* @author Graham Pople
*/
@Stability.Internal
public class TransactionsCleaner {
private final Core core;
private final CleanerHooks hooks;
private final static int BEING_LOGGING_FAILED_CLEANUPS_AT_WARN_AFTER_X_MINUTES = 60 * 2;
public TransactionsCleaner(Core core, CleanerHooks hooks) {
this.core = Objects.requireNonNull(core);
this.hooks = Objects.requireNonNull(hooks);
}
private Duration kvDurableTimeout() {
return core.context().environment().timeoutConfig().kvDurableTimeout();
}
private Duration kvNonMutatingTimeout() {
return core.context().environment().timeoutConfig().kvTimeout();
}
Mono cleanupDocs(CoreTransactionLogger perEntryLog, CleanupRequest req, SpanWrapper pspan) {
String attemptId = req.attemptId();
switch (req.state()) {
case COMMITTED: {
// Half-finished commit to complete
Mono inserts = commitDocs(perEntryLog, attemptId, req.stagedInserts(), req, pspan);
Mono replaces = commitDocs(perEntryLog, attemptId, req.stagedReplaces(), req, pspan);
Mono removes = removeDocsStagedForRemoval(perEntryLog, attemptId, req.stagedRemoves(), req, pspan);
return inserts
.then(replaces)
.then(removes);
}
case ABORTED: {
// Half-finished rollback to complete
Mono inserts = removeDocs(perEntryLog, attemptId, req.stagedInserts(), req, pspan);
// This will just remove the xattrs, which is exactly what we want
Mono replaces = removeTxnLinks(perEntryLog, attemptId, req.stagedReplaces(), req, pspan);
Mono removes = removeTxnLinks(perEntryLog, attemptId, req.stagedRemoves(), req, pspan);
return inserts
.then(replaces)
.then(removes);
}
case PENDING:
// Not much to do here, as don't have the ids of the docs involved in the txn. Leave it. All reads
// will ignore it.
perEntryLog.logDefer(req.attemptId(), "No docs cleanup possible as txn in state %s, just removing",
Event.Severity.DEBUG, req.state());
return Mono.empty();
case COMPLETED:
case ROLLED_BACK:
case NOT_STARTED:
default:
perEntryLog.logDefer(req.attemptId(), "No docs cleanup to do as txn in state %s, just removing",
Event.Severity.DEBUG, req.state());
return Mono.empty();
}
}
private Mono commitDocs(CoreTransactionLogger perEntryLog,
String attemptId,
List docs,
CleanupRequest req,
SpanWrapper pspan) {
return doPerDoc(perEntryLog, attemptId, docs, pspan, true, (collection, doc, lir) -> {
CbPreconditions.check(doc.links().isDocumentInTransaction());
CbPreconditions.check(doc.links().stagedContent().isPresent());
byte[] content = doc.links().stagedContent().get().getBytes(StandardCharsets.UTF_8);
return hooks.beforeCommitDoc.apply(doc.id()) // Testing hook
.then(Mono.defer(() -> {
if (lir.isDeleted()) {
return TransactionKVHandler.insert(core, collection, doc.id(), content, kvDurableTimeout(),
req.durabilityLevel(), OptionsUtil.createClientContext("Cleaner::commitDocsInsert"), pspan);
} else {
List commands = Arrays.asList(
new SubdocMutateRequest.Command(SubdocCommandType.DELETE, TransactionFields.TRANSACTION_INTERFACE_PREFIX_ONLY, null, false, true, false, 0),
new SubdocMutateRequest.Command(SubdocCommandType.SET_DOC, "", content, false, false, false, 1)
);
return TransactionKVHandler.mutateIn(core, collection, doc.id(), kvDurableTimeout(),
false, false, false,
lir.isDeleted(), false, doc.cas(),
req.durabilityLevel(), OptionsUtil.createClientContext("Cleaner::commitDocs"), pspan,
commands);
}
}))
.doOnSubscribe(v -> {
perEntryLog.logDefer(attemptId, "removing txn links and writing content to doc %s",
Event.Severity.DEBUG, DebugUtil.docId(doc));
})
.then();
});
}
/**
* Transaction is ABORTED, rolling back a replace or remove by removing its links
*/
private Mono removeTxnLinks(CoreTransactionLogger perEntryLog,
String attemptId,
List docs,
CleanupRequest req,
SpanWrapper pspan) {
return doPerDoc(perEntryLog, attemptId, docs, pspan, false, (collectionIdentifier, doc, lir) -> {
return hooks.beforeRemoveLinks.apply(doc.id())
.then(TransactionKVHandler.mutateIn(core, collectionIdentifier, doc.id(), kvDurableTimeout(),
false, false, false,
lir.isDeleted(), false, doc.cas(),
req.durabilityLevel(), OptionsUtil.createClientContext("Cleaner::removeTxnLinks"), pspan, Arrays.asList(
new SubdocMutateRequest.Command(SubdocCommandType.DELETE, TransactionFields.TRANSACTION_INTERFACE_PREFIX_ONLY, Bytes.EMPTY_BYTE_ARRAY, false, true, false, 0)
)))
.doOnSubscribe(v -> perEntryLog.logDefer(attemptId, "removing txn links from doc %s",
Event.Severity.DEBUG, DebugUtil.docId(doc)))
.then();
});
}
private Mono removeDocsStagedForRemoval(CoreTransactionLogger perEntryLog,
String attemptId,
List docs,
CleanupRequest req,
SpanWrapper pspan) {
return doPerDoc(perEntryLog, attemptId, docs, pspan, true, (collection, doc, lir) -> {
if (doc.links().isDocumentBeingRemoved()) {
return hooks.beforeRemoveDocStagedForRemoval.apply(doc.id()) // testing hook
.then(TransactionKVHandler.remove(core, collection, doc.id(), kvDurableTimeout(),
doc.cas(), req.durabilityLevel(), OptionsUtil.createClientContext("Cleaner::removeDoc"), pspan))
.doOnSubscribe(v -> perEntryLog.debug(attemptId, "removing doc %s", doc.id()))
.then();
} else {
return Mono.create(v -> {
perEntryLog.debug(attemptId, "doc %s does not have expected remove indication, skipping",
DebugUtil.docId(doc));
v.success();
});
}
});
}
/**
* Transaction is ABORTED, rolling back a staged insert.
*/
private Mono removeDocs(CoreTransactionLogger perEntryLog,
String attemptId,
List docs,
CleanupRequest req,
SpanWrapper pspan) {
return doPerDoc(perEntryLog, attemptId, docs, pspan, false, (collection, doc, lir) -> {
return hooks.beforeRemoveDoc.apply(doc.id())
.then(Mono.defer(() -> {
if (lir.isDeleted()) {
return TransactionKVHandler.mutateIn(core, collection, doc.id(), kvDurableTimeout(),
false, false, false,
true, false, doc.cas(),
req.durabilityLevel(), OptionsUtil.createClientContext("Cleaner::commitDocs"), pspan,
Collections.singletonList(
new SubdocMutateRequest.Command(SubdocCommandType.DELETE, TransactionFields.TRANSACTION_INTERFACE_PREFIX_ONLY, Bytes.EMPTY_BYTE_ARRAY, false, true, false, 0)
));
} else {
return TransactionKVHandler.remove(core, collection, doc.id(), kvDurableTimeout(),
doc.cas(), req.durabilityLevel(), OptionsUtil.createClientContext("Cleaner::removeDocs"), pspan);
}
}))
.doOnSubscribe(v -> perEntryLog.debug(attemptId, "removing doc %s",
DebugUtil.docId(doc)))
.then();
});
}
private Mono doPerDoc(CoreTransactionLogger perEntryLog,
String attemptId,
List docs,
SpanWrapper pspan,
boolean requireCrc32ToMatchStaging,
TriFunction> perDoc) {
return Flux.fromIterable(docs)
.publishOn(core.context().environment().transactionsSchedulers().schedulerCleanup())
.concatMap(docRecord -> {
CollectionIdentifier collection = new CollectionIdentifier(docRecord.bucketName(),
Optional.of(docRecord.scopeName()), Optional.of(docRecord.collectionName()));
MeteringUnits.MeteringUnitsBuilder units = new MeteringUnits.MeteringUnitsBuilder();
return hooks.beforeDocGet.apply(docRecord.id())
.then(doPerDocGotDoc(perEntryLog,
attemptId,
pspan,
requireCrc32ToMatchStaging,
perDoc,
docRecord,
collection,
units));
})
.then();
}
private Mono doPerDocGotDoc(CoreTransactionLogger perEntryLog,
String attemptId,
SpanWrapper pspan,
boolean requireCrc32ToMatchStaging,
TriFunction> perDoc,
DocRecord docRecord,
CollectionIdentifier collection,
MeteringUnits.MeteringUnitsBuilder units) {
return DocumentGetter.justGetDoc(core, collection, docRecord.id(), kvNonMutatingTimeout(), pspan, true, perEntryLog, units)
.flatMap(docOpt -> {
if (docOpt.isPresent()) {
CoreTransactionGetResult doc = docOpt.get().getT1();
SubdocGetResponse lir = docOpt.get().getT2();
MeteringUnits built = units.build();
perEntryLog.debug(attemptId, "handling doc %s with cas %d " +
"and links %s, isTombstone=%s%s",
DebugUtil.docId(doc), doc.cas(), doc.links(), lir.isDeleted(), DebugUtil.dbg(built));
if (!doc.links().isDocumentInTransaction()) {
// The txn probably committed this doc then crashed. This is fine, can skip.
perEntryLog.debug(attemptId, "no staged content for doc %s, assuming it was committed and skipping",
DebugUtil.docId(doc));
return Mono.empty();
} else if (!doc.links().stagedAttemptId().get().equals(attemptId)) {
perEntryLog.debug(attemptId, "for doc %s, staged version is for a " +
"different attempt %s, skipping",
DebugUtil.docId(doc),
doc.links().stagedAttemptId().get());
return Mono.empty();
} else {
// This field is only present if cleaning up a protocol 2 transaction.
if (requireCrc32ToMatchStaging && doc.links().crc32OfStaging().isPresent()) {
String crc32WhenStaging = doc.links().crc32OfStaging().get();
// This field must always be present since the doc has just been fetched.
String crc32Now = doc.crc32OfGet().get();
perEntryLog.debug(attemptId, "checking whether document %s has changed since staging, crc32 then %s now %s",
DebugUtil.docId(doc), crc32WhenStaging, crc32Now);
if (!crc32Now.equals(crc32WhenStaging)) {
perEntryLog.warn(attemptId, "document %s has changed since staging, ignoring it to avoid data loss",
DebugUtil.docId(doc));
return Mono.empty();
}
}
return perDoc.apply(collection, doc, lir);
}
} else {
perEntryLog.debug(attemptId, "could not get doc %s, skipping",
DebugUtil.docId(collection, docRecord.id()));
return Mono.empty();
}
})
.onErrorResume(err -> {
ErrorClass ec = ErrorClass.classify(err);
perEntryLog.debug(attemptId, "got exception while handling doc %s: %s",
DebugUtil.docId(collection, docRecord.id()), DebugUtil.dbg(err));
if (ec == ErrorClass.FAIL_CAS_MISMATCH) {
// Cleanup is conservative. It could be running hours, even days after the
// transaction originally committed. If the document has changed since it
// was staged, fail this cleanup attempt. It will be tried again later.
perEntryLog.debug(attemptId, "got CAS mismatch while cleaning up doc %s, " +
"failing this cleanup attempt (it will be retried)",
DebugUtil.docId(collection, docRecord.id()));
return Mono.error(err);
} else {
return Mono.error(err);
}
});
}
private RequestTracer tracer() {
return core.context().environment().requestTracer();
}
/**
* Kept purely for backwards compatibility with FIT performer.
*/
public Mono cleanupATREntry(CollectionIdentifier atrCollection,
String atrId,
String attemptId,
ActiveTransactionRecordEntry atrEntry,
boolean isRegularCleanup) {
CleanupRequest req = CleanupRequest.fromAtrEntry(atrCollection, atrEntry);
return performCleanup(req, isRegularCleanup, null);
}
/*
* Cleans up an expired attempt.
*
* Called from lost and regular.
*
* Pre-condition: This is an expired attempt that should be cleaned up. This code will not check that.
*/
public Mono performCleanup(CleanupRequest req,
boolean isRegularCleanup,
@Nullable SpanWrapper pspan) {
SpanWrapper span = SpanWrapperUtil.createOp(null, tracer(), req.atrCollection(), req.atrId(), TracingIdentifiers.TRANSACTION_CLEANUP, pspan)
.attribute(TracingIdentifiers.ATTR_TRANSACTION_ATTEMPT_ID, req.attemptId())
.attribute(TracingIdentifiers.ATTR_TRANSACTION_AGE, req.ageMillis())
.attribute(TracingIdentifiers.ATTR_TRANSACTION_STATE, req.state());
req.durabilityLevel().ifPresent(v -> {
span.lowCardinalityAttribute(TracingIdentifiers.ATTR_DURABILITY, DurabilityLevelUtil.convertDurabilityLevel(v));
});
return Mono.defer(() -> {
CollectionIdentifier atrCollection = req.atrCollection();
String atrId = req.atrId();
String attemptId = req.attemptId();
CoreTransactionLogger perEntryLog = new CoreTransactionLogger(core.context().environment().eventBus(),
ActiveTransactionRecordUtil.getAtrDebug(atrCollection, atrId).toString());
Event.Severity logLevel = Event.Severity.DEBUG;
// if (!isRegularCleanup) {
// Cleanup of lost txns should be rare enough to make them log worthy
// Update: nope. If client crashes could have thousands of these to remove.
// logLevel = Event.Severity.INFO;
// }
perEntryLog.logDefer(attemptId, "Cleaning up ATR entry (isRegular=%s) %s", logLevel, isRegularCleanup,
req);
Mono cleanupDocs = cleanupDocs(perEntryLog, req, span);
Mono