org.apache.cassandra.db.repair.PendingAntiCompaction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.db.repair;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.util.concurrent.AsyncFunction;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListenableFutureTask;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.Uninterruptibles;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.db.ColumnFamilyStore;
import org.apache.cassandra.db.compaction.CompactionInfo;
import org.apache.cassandra.db.compaction.CompactionManager;
import org.apache.cassandra.db.compaction.OperationType;
import org.apache.cassandra.db.lifecycle.LifecycleTransaction;
import org.apache.cassandra.dht.Range;
import org.apache.cassandra.dht.Token;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.io.sstable.metadata.StatsMetadata;
import org.apache.cassandra.locator.RangesAtEndpoint;
import org.apache.cassandra.service.ActiveRepairService;
import org.apache.cassandra.utils.concurrent.Refs;
import static org.apache.cassandra.service.ActiveRepairService.NO_PENDING_REPAIR;
import static org.apache.cassandra.service.ActiveRepairService.UNREPAIRED_SSTABLE;
/**
* Performs an anti compaction on a set of tables and token ranges, isolating the unrepaired sstables
* for a give token range into a pending repair group so they can't be compacted with other sstables
* while they are being repaired.
*/
public class PendingAntiCompaction
{
private static final Logger logger = LoggerFactory.getLogger(PendingAntiCompaction.class);
private static final int ACQUIRE_SLEEP_MS = Integer.getInteger("cassandra.acquire_sleep_ms", 1000);
private static final int ACQUIRE_RETRY_SECONDS = Integer.getInteger("cassandra.acquire_retry_seconds", 60);
public static class AcquireResult
{
final ColumnFamilyStore cfs;
final Refs refs;
final LifecycleTransaction txn;
AcquireResult(ColumnFamilyStore cfs, Refs refs, LifecycleTransaction txn)
{
this.cfs = cfs;
this.refs = refs;
this.txn = txn;
}
@VisibleForTesting
public void abort()
{
if (txn != null)
txn.abort();
if (refs != null)
refs.release();
}
}
static class SSTableAcquisitionException extends RuntimeException
{
SSTableAcquisitionException(String message)
{
super(message);
}
}
@VisibleForTesting
static class AntiCompactionPredicate implements Predicate
{
private final Collection> ranges;
private final UUID prsid;
public AntiCompactionPredicate(Collection> ranges, UUID prsid)
{
this.ranges = ranges;
this.prsid = prsid;
}
public boolean apply(SSTableReader sstable)
{
if (!sstable.intersects(ranges))
return false;
StatsMetadata metadata = sstable.getSSTableMetadata();
// exclude repaired sstables
if (metadata.repairedAt != UNREPAIRED_SSTABLE)
return false;
if (!sstable.descriptor.version.hasPendingRepair())
{
String message = String.format("Prepare phase failed because it encountered legacy sstables that don't " +
"support pending repair, run upgradesstables before starting incremental " +
"repairs, repair session (%s)", prsid);
throw new SSTableAcquisitionException(message);
}
// exclude sstables pending repair, but record session ids for
// non-finalized sessions for a later error message
if (metadata.pendingRepair != NO_PENDING_REPAIR)
{
if (!ActiveRepairService.instance.consistent.local.isSessionFinalized(metadata.pendingRepair))
{
String message = String.format("Prepare phase for incremental repair session %s has failed because it encountered " +
"intersecting sstables belonging to another incremental repair session (%s). This is " +
"caused by starting an incremental repair session before a previous one has completed. " +
"Check nodetool repair_admin for hung sessions and fix them.", prsid, metadata.pendingRepair);
throw new SSTableAcquisitionException(message);
}
return false;
}
Collection cis = CompactionManager.instance.active.getCompactionsForSSTable(sstable, OperationType.ANTICOMPACTION);
if (cis != null && !cis.isEmpty())
{
// todo: start tracking the parent repair session id that created the anticompaction to be able to give a better error messsage here:
StringBuilder sb = new StringBuilder();
sb.append("Prepare phase for incremental repair session ");
sb.append(prsid);
sb.append(" has failed because it encountered intersecting sstables belonging to another incremental repair session. ");
sb.append("This is caused by starting multiple conflicting incremental repairs at the same time. ");
sb.append("Conflicting anticompactions: ");
for (CompactionInfo ci : cis)
sb.append(ci.getTaskId() == null ? "no compaction id" : ci.getTaskId()).append(':').append(ci.getSSTables()).append(',');
throw new SSTableAcquisitionException(sb.toString());
}
return true;
}
}
public static class AcquisitionCallable implements Callable
{
private final ColumnFamilyStore cfs;
private final UUID sessionID;
private final AntiCompactionPredicate predicate;
private final int acquireRetrySeconds;
private final int acquireSleepMillis;
@VisibleForTesting
public AcquisitionCallable(ColumnFamilyStore cfs, Collection> ranges, UUID sessionID, int acquireRetrySeconds, int acquireSleepMillis)
{
this(cfs, sessionID, acquireRetrySeconds, acquireSleepMillis, new AntiCompactionPredicate(ranges, sessionID));
}
@VisibleForTesting
AcquisitionCallable(ColumnFamilyStore cfs, UUID sessionID, int acquireRetrySeconds, int acquireSleepMillis, AntiCompactionPredicate predicate)
{
this.cfs = cfs;
this.sessionID = sessionID;
this.predicate = predicate;
this.acquireRetrySeconds = acquireRetrySeconds;
this.acquireSleepMillis = acquireSleepMillis;
}
@SuppressWarnings("resource")
private AcquireResult acquireTuple()
{
// this method runs with compactions stopped & disabled
try
{
// using predicate might throw if there are conflicting ranges
Set sstables = cfs.getLiveSSTables().stream().filter(predicate).collect(Collectors.toSet());
if (sstables.isEmpty())
return new AcquireResult(cfs, null, null);
LifecycleTransaction txn = cfs.getTracker().tryModify(sstables, OperationType.ANTICOMPACTION);
if (txn != null)
return new AcquireResult(cfs, Refs.ref(sstables), txn);
else
logger.error("Could not mark compacting for {} (sstables = {}, compacting = {})", sessionID, sstables, cfs.getTracker().getCompacting());
}
catch (SSTableAcquisitionException e)
{
logger.warn(e.getMessage());
logger.debug("Got exception trying to acquire sstables", e);
}
return null;
}
public AcquireResult call()
{
logger.debug("acquiring sstables for pending anti compaction on session {}", sessionID);
// try to modify after cancelling running compactions. This will attempt to cancel in flight compactions including the given sstables for
// up to a minute, after which point, null will be returned
long start = System.currentTimeMillis();
long delay = TimeUnit.SECONDS.toMillis(acquireRetrySeconds);
// Note that it is `predicate` throwing SSTableAcquisitionException if it finds a conflicting sstable
// and we only retry when runWithCompactionsDisabled throws when uses the predicate, not when acquireTuple is.
// This avoids the case when we have an sstable [0, 100] and a user starts a repair on [0, 50] and then [51, 100] before
// anticompaction has finished but not when the second repair is [25, 75] for example - then we will fail it without retry.
do
{
try
{
// Note that anticompactions are not disabled when running this. This is safe since runWithCompactionsDisabled
// is synchronized - acquireTuple and predicate can only be run by a single thread (for the given cfs).
return cfs.runWithCompactionsDisabled(this::acquireTuple, predicate, false, false, false);
}
catch (SSTableAcquisitionException e)
{
logger.warn("Session {} failed acquiring sstables: {}, retrying every {}ms for another {}s",
sessionID,
e.getMessage(),
acquireSleepMillis,
TimeUnit.SECONDS.convert(delay + start - System.currentTimeMillis(), TimeUnit.MILLISECONDS));
Uninterruptibles.sleepUninterruptibly(acquireSleepMillis, TimeUnit.MILLISECONDS);
if (System.currentTimeMillis() - start > delay)
logger.warn("{} Timed out waiting to acquire sstables", sessionID, e);
}
catch (Throwable t)
{
logger.error("Got exception disabling compactions for session {}", sessionID, t);
throw t;
}
} while (System.currentTimeMillis() - start < delay);
return null;
}
}
static class AcquisitionCallback implements AsyncFunction, Object>
{
private final UUID parentRepairSession;
private final RangesAtEndpoint tokenRanges;
private final BooleanSupplier isCancelled;
public AcquisitionCallback(UUID parentRepairSession, RangesAtEndpoint tokenRanges, BooleanSupplier isCancelled)
{
this.parentRepairSession = parentRepairSession;
this.tokenRanges = tokenRanges;
this.isCancelled = isCancelled;
}
ListenableFuture> submitPendingAntiCompaction(AcquireResult result)
{
return CompactionManager.instance.submitPendingAntiCompaction(result.cfs, tokenRanges, result.refs, result.txn, parentRepairSession, isCancelled);
}
private static boolean shouldAbort(AcquireResult result)
{
if (result == null)
return true;
// sstables in the acquire result are now marked compacting and are locked to this anti compaction. If any
// of them are marked repaired or pending repair, acquisition raced with another pending anti-compaction, or
// possibly even a repair session, and we need to abort to prevent sstables from moving between sessions.
return result.refs != null && Iterables.any(result.refs, sstable -> {
StatsMetadata metadata = sstable.getSSTableMetadata();
return metadata.pendingRepair != NO_PENDING_REPAIR || metadata.repairedAt != UNREPAIRED_SSTABLE;
});
}
public ListenableFuture apply(List results) throws Exception
{
if (Iterables.any(results, AcquisitionCallback::shouldAbort))
{
// Release all sstables, and report failure back to coordinator
for (AcquireResult result : results)
{
if (result != null)
{
logger.info("Releasing acquired sstables for {}.{}", result.cfs.metadata.keyspace, result.cfs.metadata.name);
result.abort();
}
}
String message = String.format("Prepare phase for incremental repair session %s was unable to " +
"acquire exclusive access to the neccesary sstables. " +
"This is usually caused by running multiple incremental repairs on nodes that share token ranges",
parentRepairSession);
logger.warn(message);
return Futures.immediateFailedFuture(new SSTableAcquisitionException(message));
}
else
{
List> pendingAntiCompactions = new ArrayList<>(results.size());
for (AcquireResult result : results)
{
if (result.txn != null)
{
ListenableFuture> future = submitPendingAntiCompaction(result);
pendingAntiCompactions.add(future);
}
}
return Futures.allAsList(pendingAntiCompactions);
}
}
}
private final UUID prsId;
private final Collection tables;
private final RangesAtEndpoint tokenRanges;
private final ExecutorService executor;
private final int acquireRetrySeconds;
private final int acquireSleepMillis;
private final BooleanSupplier isCancelled;
public PendingAntiCompaction(UUID prsId,
Collection tables,
RangesAtEndpoint tokenRanges,
ExecutorService executor,
BooleanSupplier isCancelled)
{
this(prsId, tables, tokenRanges, ACQUIRE_RETRY_SECONDS, ACQUIRE_SLEEP_MS, executor, isCancelled);
}
@VisibleForTesting
PendingAntiCompaction(UUID prsId,
Collection tables,
RangesAtEndpoint tokenRanges,
int acquireRetrySeconds,
int acquireSleepMillis,
ExecutorService executor,
BooleanSupplier isCancelled)
{
this.prsId = prsId;
this.tables = tables;
this.tokenRanges = tokenRanges;
this.executor = executor;
this.acquireRetrySeconds = acquireRetrySeconds;
this.acquireSleepMillis = acquireSleepMillis;
this.isCancelled = isCancelled;
}
public ListenableFuture run()
{
List> tasks = new ArrayList<>(tables.size());
for (ColumnFamilyStore cfs : tables)
{
cfs.forceBlockingFlush();
ListenableFutureTask task = ListenableFutureTask.create(getAcquisitionCallable(cfs, tokenRanges.ranges(), prsId, acquireRetrySeconds, acquireSleepMillis));
executor.submit(task);
tasks.add(task);
}
ListenableFuture> acquisitionResults = Futures.successfulAsList(tasks);
ListenableFuture compactionResult = Futures.transformAsync(acquisitionResults, getAcquisitionCallback(prsId, tokenRanges), MoreExecutors.directExecutor());
return compactionResult;
}
@VisibleForTesting
protected AcquisitionCallable getAcquisitionCallable(ColumnFamilyStore cfs, Set> ranges, UUID prsId, int acquireRetrySeconds, int acquireSleepMillis)
{
return new AcquisitionCallable(cfs, ranges, prsId, acquireRetrySeconds, acquireSleepMillis);
}
@VisibleForTesting
protected AcquisitionCallback getAcquisitionCallback(UUID prsId, RangesAtEndpoint tokenRanges)
{
return new AcquisitionCallback(prsId, tokenRanges, isCancelled);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy