org.apache.tephra.TransactionManager Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tephra;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.AbstractService;
import com.google.inject.Inject;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongArraySet;
import it.unimi.dsi.fastutil.longs.LongIterator;
import it.unimi.dsi.fastutil.longs.LongSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.tephra.manager.InvalidTxList;
import org.apache.tephra.metrics.DefaultMetricsCollector;
import org.apache.tephra.metrics.MetricsCollector;
import org.apache.tephra.persist.NoOpTransactionStateStorage;
import org.apache.tephra.persist.TransactionEdit;
import org.apache.tephra.persist.TransactionLog;
import org.apache.tephra.persist.TransactionLogReader;
import org.apache.tephra.persist.TransactionSnapshot;
import org.apache.tephra.persist.TransactionStateStorage;
import org.apache.tephra.snapshot.SnapshotCodecProvider;
import org.apache.tephra.util.TxUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/**
* This is the central place to manage all active transactions in the system.
*
* A transaction consists of
*
* - A write pointer: This is the version used for all writes of that transaction.
* - A read pointer: All reads under the transaction use this as an upper bound for the version.
* - A set of excluded versions: These are the write versions of other transactions that must be excluded from
* reads, because those transactions are still in progress, or they failed but couldn't be properly rolled back.
*
* To use the transaction system, a client must follow this sequence of steps:
*
* - Request a new transaction.
* - Use the transaction to read and write datasets. Datasets are encouraged to cache the writes of the
* transaction in memory, to reduce the cost of rollback in case the transaction fails.
* - Check whether the transaction has conflicts. For this, the set of change keys are submitted via canCommit(),
* and the transaction manager verifies that none of these keys are in conflict with other transactions that
* committed since the start of this transaction.
* - If the transaction has conflicts:
*
* - Roll back the changes in every dataset that was changed. This can happen in-memory if the
* changes were cached.
* - Abort the transaction to remove it from the active transactions.
*
* - If the transaction has no conflicts:
*
* - Persist all datasets changes to storage.
* - Commit the transaction. This will repeat the conflict detection, because more overlapping transactions
* may have committed since the first conflict check.
* - If the transaction has conflicts:
*
* - Roll back the changes in every dataset that was changed. This is more expensive because
* changes must be undone in persistent storage.
* - Abort the transaction to remove it from the active transactions.
*
*
*
* Transactions may be short or long-running. A short transaction is started with a timeout, and if it is not
* committed before that timeout, it is invalidated and excluded from future reads. A long-running transaction has
* no timeout and will remain active until it is committed or aborted. Long transactions are typically used in
* map/reduce jobs and can produce enormous amounts of changes. Therefore, long transactions do not participate in
* conflict detection (they would almost always have conflicts). We also assume that the changes of long transactions
* are not tracked, and therefore cannot be rolled back. Hence, when a long transaction is aborted, it remains in the
* list of excluded transactions to make its writes invisible.
*/
public class TransactionManager extends AbstractService {
// todo: optimize heavily
private static final Logger LOG = LoggerFactory.getLogger(TransactionManager.class);
// poll every 1 second to check whether a snapshot is needed
private static final long SNAPSHOT_POLL_INTERVAL = 1000L;
//poll every 10 second to emit metrics
private static final long METRICS_POLL_INTERVAL = 10000L;
//Client id that is used if a client doesn't provide one while starting a transaction.
private static final String DEFAULT_CLIENTID = "unknown";
// Transactions that are in progress, with their info.
private final NavigableMap inProgress = new ConcurrentSkipListMap<>();
// the list of transactions that are invalid (not properly committed/aborted, or timed out)
private final InvalidTxList invalidTxList = new InvalidTxList();
// todo: use moving array instead (use Long2ObjectMap in fastutil)
// todo: should this be consolidated with inProgress?
// commit time next writePointer -> changes made by this tx
private final NavigableMap committedChangeSets = new ConcurrentSkipListMap<>();
// not committed yet
private final Map committingChangeSets = Maps.newConcurrentMap();
private long readPointer;
private long lastWritePointer;
private MetricsCollector txMetricsCollector;
private final TransactionStateStorage persistor;
private final int cleanupInterval;
private final int defaultTimeout;
private final int defaultLongTimeout;
private final int maxTimeout;
private DaemonThreadExecutor cleanupThread = null;
private volatile TransactionLog currentLog;
// timestamp of the last completed snapshot
private long lastSnapshotTime;
// frequency in millis to perform snapshots
private final long snapshotFrequencyInSeconds;
// number of most recent snapshots to retain
private final int snapshotRetainCount;
private DaemonThreadExecutor snapshotThread;
private DaemonThreadExecutor metricsThread;
// retention of client id for transactions - this affects memory footprint
private final boolean retainClientId;
private final boolean retainClientIdPastCommit;
// lock guarding change of the current transaction log
private final ReentrantReadWriteLock logLock = new ReentrantReadWriteLock();
private final Lock logReadLock = logLock.readLock();
private final Lock logWriteLock = logLock.writeLock();
private final int changeSetCountLimit;
private final int changeSetCountThreshold;
private final long changeSetSizeLimit;
private final long changeSetSizeThreshold;
// fudge factor (in milliseconds) used when interpreting transactions as LONG based on expiration
// TODO: REMOVE WITH txnBackwardsCompatCheck()
private final long longTimeoutTolerance;
public TransactionManager(Configuration config) {
this(config, new NoOpTransactionStateStorage(new SnapshotCodecProvider(config)), new DefaultMetricsCollector());
}
@Inject
public TransactionManager(Configuration conf, @Nonnull TransactionStateStorage persistor,
MetricsCollector txMetricsCollector) {
this.persistor = persistor;
cleanupInterval = conf.getInt(TxConstants.Manager.CFG_TX_CLEANUP_INTERVAL,
TxConstants.Manager.DEFAULT_TX_CLEANUP_INTERVAL);
maxTimeout = conf.getInt(TxConstants.Manager.CFG_TX_MAX_TIMEOUT,
TxConstants.Manager.DEFAULT_TX_MAX_TIMEOUT);
defaultTimeout = conf.getInt(TxConstants.Manager.CFG_TX_TIMEOUT,
TxConstants.Manager.DEFAULT_TX_TIMEOUT);
defaultLongTimeout = conf.getInt(TxConstants.Manager.CFG_TX_LONG_TIMEOUT,
TxConstants.Manager.DEFAULT_TX_LONG_TIMEOUT);
snapshotFrequencyInSeconds = conf.getLong(TxConstants.Manager.CFG_TX_SNAPSHOT_INTERVAL,
TxConstants.Manager.DEFAULT_TX_SNAPSHOT_INTERVAL);
// must always keep at least 1 snapshot
snapshotRetainCount = Math.max(conf.getInt(TxConstants.Manager.CFG_TX_SNAPSHOT_RETAIN,
TxConstants.Manager.DEFAULT_TX_SNAPSHOT_RETAIN), 1);
changeSetCountLimit = conf.getInt(TxConstants.Manager.CFG_TX_CHANGESET_COUNT_LIMIT,
TxConstants.Manager.DEFAULT_TX_CHANGESET_COUNT_LIMIT);
changeSetCountThreshold = conf.getInt(TxConstants.Manager.CFG_TX_CHANGESET_COUNT_WARN_THRESHOLD,
TxConstants.Manager.DEFAULT_TX_CHANGESET_COUNT_WARN_THRESHOLD);
changeSetSizeLimit = conf.getLong(TxConstants.Manager.CFG_TX_CHANGESET_SIZE_LIMIT,
TxConstants.Manager.DEFAULT_TX_CHANGESET_SIZE_LIMIT);
changeSetSizeThreshold = conf.getLong(TxConstants.Manager.CFG_TX_CHANGESET_SIZE_WARN_THRESHOLD,
TxConstants.Manager.DEFAULT_TX_CHANGESET_SIZE_WARN_THRESHOLD);
// intentionally not using a constant, as this config should not be exposed
// TODO: REMOVE WITH txnBackwardsCompatCheck()
longTimeoutTolerance = conf.getLong("data.tx.long.timeout.tolerance", 10000);
ClientIdRetention retention = ClientIdRetention.valueOf(
conf.get(TxConstants.Manager.CFG_TX_RETAIN_CLIENT_ID,
TxConstants.Manager.DEFAULT_TX_RETAIN_CLIENT_ID).toUpperCase());
this.retainClientId = retention != ClientIdRetention.OFF;
this.retainClientIdPastCommit = retention == ClientIdRetention.COMMITTED;
this.txMetricsCollector = txMetricsCollector;
this.txMetricsCollector.configure(conf);
clear();
}
enum ClientIdRetention { OFF, ACTIVE, COMMITTED }
private void clear() {
invalidTxList.clear();
inProgress.clear();
committedChangeSets.clear();
committingChangeSets.clear();
lastWritePointer = 0;
readPointer = 0;
lastSnapshotTime = 0;
}
private boolean isStopping() {
return State.STOPPING.equals(state());
}
@Override
public synchronized void doStart() {
LOG.info("Starting transaction manager.");
txMetricsCollector.start();
// start up the persistor
persistor.startAndWait();
try {
persistor.setupStorage();
} catch (IOException e) {
Throwables.propagate(e);
}
// establish defaults in case there is no persistence
clear();
// attempt to recover state from last run
recoverState();
// start the periodic cleanup thread
startCleanupThread();
startSnapshotThread();
startMetricsThread();
// initialize the WAL if we did not force a snapshot in recoverState()
initLog();
// initialize next write pointer if needed
if (lastWritePointer == 0) {
lastWritePointer = getNextWritePointer();
readPointer = lastWritePointer;
}
notifyStarted();
}
private void initLog() {
if (currentLog == null) {
try {
currentLog = persistor.createLog(System.currentTimeMillis());
} catch (IOException ioe) {
throw Throwables.propagate(ioe);
}
}
}
private void startCleanupThread() {
if (cleanupInterval <= 0 || defaultTimeout <= 0) {
return;
}
LOG.info("Starting periodic timed-out transaction cleanup every " + cleanupInterval +
" seconds with default timeout of " + defaultTimeout + " seconds.");
this.cleanupThread = new DaemonThreadExecutor("tx-clean-timeout") {
@Override
public void doRun() {
cleanupTimedOutTransactions();
}
@Override
public long getSleepMillis() {
return cleanupInterval * 1000;
}
};
cleanupThread.start();
}
private void startSnapshotThread() {
if (snapshotFrequencyInSeconds > 0) {
LOG.info("Starting periodic snapshot thread, frequency = " + snapshotFrequencyInSeconds +
" seconds, location = " + persistor.getLocation());
this.snapshotThread = new DaemonThreadExecutor("tx-snapshot") {
@Override
public void doRun() {
long currentTime = System.currentTimeMillis();
if (lastSnapshotTime < (currentTime - snapshotFrequencyInSeconds * 1000)) {
try {
doSnapshot(false);
} catch (IOException ioe) {
LOG.error("Periodic snapshot failed!", ioe);
}
}
}
@Override
protected void onShutdown() {
// perform a final snapshot
try {
LOG.info("Writing final snapshot prior to shutdown");
doSnapshot(true);
} catch (IOException ioe) {
LOG.error("Failed performing final snapshot on shutdown", ioe);
}
}
@Override
public long getSleepMillis() {
return SNAPSHOT_POLL_INTERVAL;
}
};
snapshotThread.start();
}
}
// Emits Transaction Data structures size as metrics
private void startMetricsThread() {
LOG.info("Starting periodic Metrics Emitter thread, frequency = " + METRICS_POLL_INTERVAL);
this.metricsThread = new DaemonThreadExecutor("tx-metrics") {
@Override
public void doRun() {
txMetricsCollector.gauge("committing.size", committingChangeSets.size());
txMetricsCollector.gauge("committed.size", committedChangeSets.size());
txMetricsCollector.gauge("inprogress.size", inProgress.size());
txMetricsCollector.gauge("invalid.size", getInvalidSize());
}
@Override
protected void onShutdown() {
// perform a final metrics emit
txMetricsCollector.gauge("committing.size", committingChangeSets.size());
txMetricsCollector.gauge("committed.size", committedChangeSets.size());
txMetricsCollector.gauge("inprogress.size", inProgress.size());
txMetricsCollector.gauge("invalid.size", getInvalidSize());
}
@Override
public long getSleepMillis() {
return METRICS_POLL_INTERVAL;
}
};
metricsThread.start();
}
private void cleanupTimedOutTransactions() {
List invalidEdits = null;
logReadLock.lock();
try {
synchronized (this) {
if (!isRunning()) {
return;
}
long currentTime = System.currentTimeMillis();
Map timedOut = Maps.newHashMap();
for (Map.Entry tx : inProgress.entrySet()) {
InProgressTx inProgressTx = tx.getValue();
long expiration = inProgressTx.getExpiration();
if (expiration >= 0L && currentTime > expiration) {
// timed out, remember tx id (can't remove while iterating over entries)
timedOut.put(tx.getKey(), inProgressTx.getType());
LOG.info("Tx invalid list: added tx {} belonging to client '{}' because of timeout.",
tx.getKey(), inProgressTx.getClientId());
} else if (expiration < 0) {
LOG.warn("Transaction {} has negative expiration time {}. Likely cause is the transaction was not " +
"migrated correctly, this transaction will be expired immediately", tx.getKey(), expiration);
timedOut.put(tx.getKey(), InProgressType.LONG);
}
}
if (!timedOut.isEmpty()) {
invalidEdits = Lists.newArrayListWithCapacity(timedOut.size());
invalidTxList.addAll(timedOut.keySet());
for (Map.Entry tx : timedOut.entrySet()) {
inProgress.remove(tx.getKey());
// checkpoints never go into the committing change sets or the edits
if (!InProgressType.CHECKPOINT.equals(tx.getValue())) {
committingChangeSets.remove(tx.getKey());
invalidEdits.add(TransactionEdit.createInvalid(tx.getKey()));
}
}
LOG.info("Invalidated {} transactions due to timeout.", timedOut.size());
}
}
if (invalidEdits != null) {
appendToLog(invalidEdits);
}
} finally {
this.logReadLock.unlock();
}
}
public synchronized TransactionSnapshot getSnapshot() throws IOException {
TransactionSnapshot snapshot;
if (!isRunning() && !isStopping()) {
return null;
}
long now = System.currentTimeMillis();
// avoid duplicate snapshots at same timestamp
if (now == lastSnapshotTime || (currentLog != null && now == currentLog.getTimestamp())) {
try {
TimeUnit.MILLISECONDS.sleep(1);
} catch (InterruptedException ie) { }
}
// copy in memory state
snapshot = getCurrentState();
LOG.debug("Starting snapshot of transaction state with timestamp {}", snapshot.getTimestamp());
LOG.debug("Returning snapshot of state: " + snapshot);
return snapshot;
}
/**
* Take a snapshot of the transaction state and serialize it into the given output stream.
* @return whether a snapshot was taken.
*/
public boolean takeSnapshot(OutputStream out) throws IOException {
TransactionSnapshot snapshot = getSnapshot();
if (snapshot != null) {
persistor.writeSnapshot(out, snapshot);
return true;
} else {
return false;
}
}
private void doSnapshot(boolean closing) throws IOException {
long snapshotTime = 0L;
TransactionSnapshot snapshot;
TransactionLog oldLog;
try {
this.logWriteLock.lock();
try {
synchronized (this) {
snapshot = getSnapshot();
if (snapshot == null && !closing) {
return;
}
if (snapshot != null) {
snapshotTime = snapshot.getTimestamp();
}
// roll WAL
oldLog = currentLog;
if (!closing) {
currentLog = persistor.createLog(snapshot.getTimestamp());
}
}
// there may not be an existing log on startup
if (oldLog != null) {
oldLog.close();
}
} finally {
this.logWriteLock.unlock();
}
// save snapshot
if (snapshot != null) {
persistor.writeSnapshot(snapshot);
lastSnapshotTime = snapshotTime;
// clean any obsoleted snapshots and WALs
long oldestRetainedTimestamp = persistor.deleteOldSnapshots(snapshotRetainCount);
persistor.deleteLogsOlderThan(oldestRetainedTimestamp);
}
} catch (IOException ioe) {
abortService("Snapshot (timestamp " + snapshotTime + ") failed due to: " + ioe.getMessage(), ioe);
}
}
public synchronized TransactionSnapshot getCurrentState() {
return TransactionSnapshot.copyFrom(System.currentTimeMillis(), readPointer, lastWritePointer,
invalidTxList, inProgress, committingChangeSets,
committedChangeSets);
}
public synchronized void recoverState() {
try {
TransactionSnapshot lastSnapshot = persistor.getLatestSnapshot();
// if we failed before a snapshot could complete, we might not have one to restore
if (lastSnapshot != null) {
restoreSnapshot(lastSnapshot);
}
// replay any WALs since the last snapshot
Collection logs = persistor.getLogsSince(lastSnapshotTime);
if (logs != null) {
replayLogs(logs);
}
} catch (IOException e) {
LOG.error("Unable to read back transaction state:", e);
throw Throwables.propagate(e);
}
}
/**
* Restore the initial in-memory transaction state from a snapshot.
*/
private void restoreSnapshot(TransactionSnapshot snapshot) {
LOG.info("Restoring transaction state from snapshot at " + snapshot.getTimestamp());
Preconditions.checkState(lastSnapshotTime == 0, "lastSnapshotTime has been set!");
Preconditions.checkState(readPointer == 0, "readPointer has been set!");
Preconditions.checkState(lastWritePointer == 0, "lastWritePointer has been set!");
Preconditions.checkState(invalidTxList.isEmpty(), "invalid list should be empty!");
Preconditions.checkState(inProgress.isEmpty(), "inProgress map should be empty!");
Preconditions.checkState(committingChangeSets.isEmpty(), "committingChangeSets should be empty!");
Preconditions.checkState(committedChangeSets.isEmpty(), "committedChangeSets should be empty!");
LOG.info("Restoring snapshot of state: " + snapshot);
lastSnapshotTime = snapshot.getTimestamp();
readPointer = snapshot.getReadPointer();
lastWritePointer = snapshot.getWritePointer();
invalidTxList.addAll(snapshot.getInvalid());
inProgress.putAll(txnBackwardsCompatCheck(defaultLongTimeout, longTimeoutTolerance, snapshot.getInProgress()));
for (Map.Entry> entry : snapshot.getCommittingChangeSets().entrySet()) {
committingChangeSets.put(entry.getKey(), new ChangeSet(null, entry.getValue()));
}
for (Map.Entry> entry : snapshot.getCommittedChangeSets().entrySet()) {
committedChangeSets.put(entry.getKey(), new ChangeSet(null, entry.getValue()));
}
}
/**
* Check if in-progress transactions need to be migrated to have expiration time and type, if so do the migration.
* This is required for backwards compatibility, when long running transactions were represented
* with expiration time -1. This can be removed when we stop supporting SnapshotCodec version 1.
*/
public static Map txnBackwardsCompatCheck(int defaultLongTimeout, long longTimeoutTolerance,
Map inProgress) {
for (Map.Entry entry : inProgress.entrySet()) {
long writePointer = entry.getKey();
long expiration = entry.getValue().getExpiration();
// LONG transactions will either have a negative expiration or expiration set to the long timeout
// use a fudge factor on the expiration check, since expiraton is set based on system time, not the write pointer
if (entry.getValue().getType() == null &&
(expiration < 0 ||
(getTxExpirationFromWritePointer(writePointer, defaultLongTimeout) - expiration
< longTimeoutTolerance))) {
// handle null expiration
long newExpiration = getTxExpirationFromWritePointer(writePointer, defaultLongTimeout);
InProgressTx compatTx =
new InProgressTx(entry.getValue().getVisibilityUpperBound(), newExpiration, InProgressType.LONG,
entry.getValue().getCheckpointWritePointers());
entry.setValue(compatTx);
} else if (entry.getValue().getType() == null) {
InProgressTx compatTx =
new InProgressTx(entry.getValue().getVisibilityUpperBound(), entry.getValue().getExpiration(),
InProgressType.SHORT, entry.getValue().getCheckpointWritePointers());
entry.setValue(compatTx);
}
}
return inProgress;
}
/**
* Resets the state of the transaction manager.
*/
public void resetState() {
this.logWriteLock.lock();
try {
// Take a snapshot before resetting the state, for debugging purposes
doSnapshot(false);
// Clear the state
clear();
// Take another snapshot: if no snapshot is taken after clearing the state
// and the manager is restarted, we will recover from the snapshot taken
// before resetting the state, which would be really bad
// This call will also init a new WAL
doSnapshot(false);
} catch (IOException e) {
LOG.error("Snapshot failed when resetting state!", e);
e.printStackTrace();
} finally {
this.logWriteLock.unlock();
}
}
/**
* Replay all logged edits from the given transaction logs.
*/
private void replayLogs(Collection logs) {
for (TransactionLog log : logs) {
LOG.info("Replaying edits from transaction log " + log.getName());
int editCnt = 0;
try {
TransactionLogReader reader = log.getReader();
// reader may be null in the case of an empty file
if (reader == null) {
continue;
}
TransactionEdit edit;
while ((edit = reader.next()) != null) {
editCnt++;
switch (edit.getState()) {
case INPROGRESS:
long expiration = edit.getExpiration();
TransactionType type = edit.getType();
// Check if transaction needs to be migrated to have expiration and type. Previous version of
// long running transactions were represented with expiration time as -1.
// This can be removed when we stop supporting TransactionEditCodecV2.
if (expiration < 0) {
expiration = getTxExpirationFromWritePointer(edit.getWritePointer(), defaultLongTimeout);
type = TransactionType.LONG;
} else if (type == null) {
type = TransactionType.SHORT;
}
// We don't persist the client id.
addInProgressAndAdvance(edit.getWritePointer(), edit.getVisibilityUpperBound(), expiration, type, null);
break;
case COMMITTING:
addCommittingChangeSet(edit.getWritePointer(), null, edit.getChanges());
break;
case COMMITTED:
// TODO: need to reconcile usage of transaction id v/s write pointer TEPHRA-140
long transactionId = edit.getWritePointer();
long[] checkpointPointers = edit.getCheckpointPointers();
long writePointer = checkpointPointers == null || checkpointPointers.length == 0 ?
transactionId : checkpointPointers[checkpointPointers.length - 1];
doCommit(transactionId, writePointer, new ChangeSet(null, edit.getChanges()),
edit.getCommitPointer(), edit.getCanCommit());
break;
case INVALID:
doInvalidate(edit.getWritePointer());
break;
case ABORTED:
type = edit.getType();
// Check if transaction edit needs to be migrated to have type. Previous versions of
// ABORTED edits did not contain type.
// This can be removed when we stop supporting TransactionEditCodecV2.
if (type == null) {
InProgressTx inProgressTx = inProgress.get(edit.getWritePointer());
if (inProgressTx != null) {
InProgressType inProgressType = inProgressTx.getType();
if (InProgressType.CHECKPOINT.equals(inProgressType)) {
// this should never happen, because checkpoints never go into the log edits;
LOG.debug("Ignoring ABORTED edit for a checkpoint transaction {}", edit.getWritePointer());
break;
}
if (inProgressType != null) {
type = inProgressType.getTransactionType();
}
} else {
// If transaction is not in-progress, then it has either been already aborted or invalidated.
// We cannot determine the transaction's state based on current information, to be safe invalidate it.
LOG.warn("Invalidating transaction {} as it's type cannot be determined during replay",
edit.getWritePointer());
doInvalidate(edit.getWritePointer());
break;
}
}
doAbort(edit.getWritePointer(), edit.getCheckpointPointers(), type);
break;
case TRUNCATE_INVALID_TX:
if (edit.getTruncateInvalidTxTime() != 0) {
doTruncateInvalidTxBefore(edit.getTruncateInvalidTxTime());
} else {
doTruncateInvalidTx(edit.getTruncateInvalidTx());
}
break;
case CHECKPOINT:
doCheckpoint(edit.getWritePointer(), edit.getParentWritePointer());
break;
default:
// unknown type!
throw new IllegalArgumentException("Invalid state for WAL entry: " + edit.getState());
}
}
} catch (IOException | InvalidTruncateTimeException e) {
throw Throwables.propagate(e);
}
LOG.info("Read " + editCnt + " edits from log " + log.getName());
}
}
@Override
public void doStop() {
Stopwatch timer = new Stopwatch().start();
LOG.info("Shutting down gracefully...");
// signal the cleanup thread to stop
if (cleanupThread != null) {
cleanupThread.shutdown();
try {
cleanupThread.join(30000L);
} catch (InterruptedException ie) {
LOG.warn("Interrupted waiting for cleanup thread to stop");
Thread.currentThread().interrupt();
}
}
if (metricsThread != null) {
metricsThread.shutdown();
try {
metricsThread.join(30000L);
} catch (InterruptedException ie) {
LOG.warn("Interrupted waiting for cleanup thread to stop");
Thread.currentThread().interrupt();
}
}
if (snapshotThread != null) {
// this will trigger a final snapshot on stop
snapshotThread.shutdown();
try {
snapshotThread.join(30000L);
} catch (InterruptedException ie) {
LOG.warn("Interrupted waiting for snapshot thread to stop");
Thread.currentThread().interrupt();
}
}
persistor.stopAndWait();
txMetricsCollector.stop();
timer.stop();
LOG.info("Took " + timer + " to stop");
notifyStopped();
}
/**
* Immediately shuts down the service, without going through the normal close process.
* @param message A message describing the source of the failure.
* @param error Any exception that caused the failure.
*/
private void abortService(String message, Throwable error) {
if (isRunning()) {
LOG.error("Aborting transaction manager due to: " + message, error);
notifyFailed(error);
}
}
private void ensureAvailable() {
Preconditions.checkState(isRunning(), "Transaction Manager is not running.");
}
/**
* Start a short transaction with the default timeout.
*/
public Transaction startShort() {
return startShort(defaultTimeout);
}
/**
* Start a short transaction with a client id and default timeout.
* @param clientId id of the client requesting a transaction.
*/
public Transaction startShort(String clientId) {
return startShort(clientId, defaultTimeout);
}
/**
* Start a short transaction with a given timeout.
* @param timeoutInSeconds the time out period in seconds.
*/
public Transaction startShort(int timeoutInSeconds) {
return startShort(DEFAULT_CLIENTID, timeoutInSeconds);
}
/**
* Start a short transaction with a given timeout.
* @param clientId id of the client requesting a transaction.
* @param timeoutInSeconds the time out period in seconds.
*/
public Transaction startShort(String clientId, int timeoutInSeconds) {
Preconditions.checkArgument(clientId != null, "clientId must not be null");
Preconditions.checkArgument(timeoutInSeconds > 0,
"timeout must be positive but is %s seconds", timeoutInSeconds);
Preconditions.checkArgument(timeoutInSeconds <= maxTimeout,
"timeout must not exceed %s seconds but is %s seconds", maxTimeout, timeoutInSeconds);
txMetricsCollector.rate("start.short");
Stopwatch timer = new Stopwatch().start();
long expiration = getTxExpiration(timeoutInSeconds);
Transaction tx = startTx(expiration, TransactionType.SHORT, clientId);
txMetricsCollector.histogram("start.short.latency", (int) timer.elapsedMillis());
return tx;
}
private static long getTxExpiration(long timeoutInSeconds) {
long currentTime = System.currentTimeMillis();
return currentTime + TimeUnit.SECONDS.toMillis(timeoutInSeconds);
}
public static long getTxExpirationFromWritePointer(long writePointer, long timeoutInSeconds) {
return writePointer / TxConstants.MAX_TX_PER_MS + TimeUnit.SECONDS.toMillis(timeoutInSeconds);
}
private long getNextWritePointer() {
// We want to align tx ids with current time. We assume that tx ids are sequential, but not less than
// System.currentTimeMillis() * MAX_TX_PER_MS.
return Math.max(lastWritePointer + 1, System.currentTimeMillis() * TxConstants.MAX_TX_PER_MS);
}
/**
* Start a long transaction. Long transactions and do not participate in conflict detection. Also, aborting a long
* transaction moves it to the invalid list because we assume that its writes cannot be rolled back.
*/
public Transaction startLong() {
return startLong(DEFAULT_CLIENTID);
}
/**
* Starts a long transaction with a client id.
*/
public Transaction startLong(String clientId) {
Preconditions.checkArgument(clientId != null, "clientId must not be null");
txMetricsCollector.rate("start.long");
Stopwatch timer = new Stopwatch().start();
long expiration = getTxExpiration(defaultLongTimeout);
Transaction tx = startTx(expiration, TransactionType.LONG, clientId);
txMetricsCollector.histogram("start.long.latency", (int) timer.elapsedMillis());
return tx;
}
private Transaction startTx(long expiration, TransactionType type, @Nullable String clientId) {
Transaction tx;
long txid;
// guard against changes to the transaction log while processing
this.logReadLock.lock();
try {
synchronized (this) {
ensureAvailable();
txid = getNextWritePointer();
tx = createTransaction(txid, type);
addInProgressAndAdvance(tx.getTransactionId(), tx.getVisibilityUpperBound(), expiration, type,
retainClientId ? clientId : null);
}
// appending to WAL out of global lock for concurrent performance
// we should still be able to arrive at the same state even if log entries are out of order
appendToLog(TransactionEdit.createStarted(tx.getTransactionId(), tx.getVisibilityUpperBound(), expiration, type));
} finally {
this.logReadLock.unlock();
}
return tx;
}
private void addInProgressAndAdvance(long writePointer, long visibilityUpperBound,
long expiration, TransactionType type, @Nullable String clientId) {
addInProgressAndAdvance(writePointer, visibilityUpperBound, expiration, InProgressType.of(type), clientId);
}
private void addInProgressAndAdvance(long writePointer, long visibilityUpperBound,
long expiration, InProgressType type, @Nullable String clientId) {
inProgress.put(writePointer, new InProgressTx(clientId, visibilityUpperBound, expiration, type));
advanceWritePointer(writePointer);
}
private void advanceWritePointer(long writePointer) {
// don't move the write pointer back if we have out of order transaction log entries
if (writePointer > lastWritePointer) {
lastWritePointer = writePointer;
}
}
public void canCommit(long txId, Collection changeIds)
throws TransactionNotInProgressException, TransactionSizeException, TransactionConflictException {
txMetricsCollector.rate("canCommit");
Stopwatch timer = new Stopwatch().start();
InProgressTx inProgressTx = inProgress.get(txId);
if (inProgressTx == null) {
synchronized (this) {
// invalid transaction, either this has timed out and moved to invalid, or something else is wrong.
if (invalidTxList.contains(txId)) {
throw new TransactionNotInProgressException(
String.format(
"canCommit() is called for transaction %d that is not in progress (it is known to be invalid)", txId));
} else {
throw new TransactionNotInProgressException(
String.format("canCommit() is called for transaction %d that is not in progress", txId));
}
}
}
Set set =
validateChangeSet(txId, changeIds, inProgressTx.clientId != null ? inProgressTx.clientId : DEFAULT_CLIENTID);
checkForConflicts(txId, set);
// guard against changes to the transaction log while processing
this.logReadLock.lock();
try {
synchronized (this) {
ensureAvailable();
addCommittingChangeSet(txId, inProgressTx.getClientId(), set);
}
appendToLog(TransactionEdit.createCommitting(txId, set));
} finally {
this.logReadLock.unlock();
}
txMetricsCollector.histogram("canCommit.latency", (int) timer.elapsedMillis());
}
/**
* Validate the number of changes and the total size of changes. Log a warning if either of them exceeds the
* configured threshold, or log a warning and throw an exception if it exceeds the configured limit.
*
* We log here because application developers may ignore warnings. Logging here gives us a single point
* (the tx manager log) to identify all clients that send excessively large change sets.
*
* @return the same set of changes, transformed into a set of {@link ChangeId}s.
* @throws TransactionSizeException if the number or total size of the changes exceed the limit.
*/
private Set validateChangeSet(long txId, Collection changeIds,
String clientId) throws TransactionSizeException {
if (changeIds.size() > changeSetCountLimit) {
LOG.warn("Change set for transaction {} belonging to client '{}' has {} entries and exceeds " +
"the allowed size of {}. Limit the number of changes, or use a long-running transaction. ",
txId, clientId, changeIds.size(), changeSetCountLimit);
throw new TransactionSizeException(String.format(
"Change set for transaction %d has %d entries and exceeds the limit of %d",
txId, changeIds.size(), changeSetCountLimit));
} else if (changeIds.size() > changeSetCountThreshold) {
LOG.warn("Change set for transaction {} belonging to client '{}' has {} entries. " +
"It is recommended to limit the number of changes to {}, or to use a long-running transaction. ",
txId, clientId, changeIds.size(), changeSetCountThreshold);
}
long byteCount = 0L;
Set set = Sets.newHashSetWithExpectedSize(changeIds.size());
for (byte[] change : changeIds) {
set.add(new ChangeId(change));
byteCount += change.length;
}
if (byteCount > changeSetSizeLimit) {
LOG.warn("Change set for transaction {} belonging to client '{}' has total size of {} bytes and exceeds " +
"the allowed size of {} bytes. Limit the total size of changes, or use a long-running transaction. ",
txId, clientId, byteCount, changeSetSizeLimit);
throw new TransactionSizeException(String.format(
"Change set for transaction %d has total size of %d bytes and exceeds the limit of %d bytes",
txId, byteCount, changeSetSizeLimit));
} else if (byteCount > changeSetSizeThreshold) {
LOG.warn("Change set for transaction {} belonging to client '{}' has total size of {} bytes. " +
"It is recommended to limit the total size to {} bytes, or to use a long-running transaction. ",
txId, clientId, byteCount, changeSetSizeThreshold);
}
return set;
}
private void addCommittingChangeSet(long writePointer, String clientId, Set changes) {
committingChangeSets.put(writePointer, new ChangeSet(retainClientIdPastCommit ? clientId : null, changes));
}
public void commit(long txId, long writePointer)
throws TransactionNotInProgressException, TransactionConflictException {
txMetricsCollector.rate("commit");
Stopwatch timer = new Stopwatch().start();
ChangeSet changeSet;
boolean addToCommitted = true;
long commitPointer;
// guard against changes to the transaction log while processing
this.logReadLock.lock();
try {
synchronized (this) {
ensureAvailable();
// we record commits at the first not-yet assigned transaction id to simplify clearing out change sets that
// are no longer visible by any in-progress transactions
commitPointer = lastWritePointer + 1;
if (inProgress.get(txId) == null) {
// invalid transaction, either this has timed out and moved to invalid, or something else is wrong.
if (invalidTxList.contains(txId)) {
throw new TransactionNotInProgressException(
String.format("canCommit() is called for transaction %d that is not in progress " +
"(it is known to be invalid)", txId));
} else {
throw new TransactionNotInProgressException(
String.format("canCommit() is called for transaction %d that is not in progress", txId));
}
}
// these should be atomic
// NOTE: whether we succeed or not we don't need to keep changes in committing state: same tx cannot
// be attempted to commit twice
changeSet = committingChangeSets.remove(txId);
if (changeSet != null) {
// double-checking if there are conflicts: someone may have committed since canCommit check
checkForConflicts(txId, changeSet.getChangeIds());
} else {
// no changes
addToCommitted = false;
}
doCommit(txId, writePointer, changeSet, commitPointer, addToCommitted);
}
appendToLog(TransactionEdit.createCommitted(txId, changeSet == null ? null : changeSet.getChangeIds(),
commitPointer, addToCommitted));
} finally {
this.logReadLock.unlock();
}
txMetricsCollector.histogram("commit.latency", (int) timer.elapsedMillis());
}
private void doCommit(long transactionId, long writePointer, ChangeSet changes, long commitPointer,
boolean addToCommitted) {
// In case this method is called when loading a previous WAL, we need to remove the tx from these sets
committingChangeSets.remove(transactionId);
if (addToCommitted && !changes.getChangeIds().isEmpty()) {
// No need to add empty changes to the committed change sets, they will never trigger any conflict
// Record the committed change set with the next writePointer as the commit time.
// NOTE: we use current next writePointer as key for the map, hence we may have multiple txs changesets to be
// stored under one key
ChangeSet committed = committedChangeSets.get(commitPointer);
if (committed != null) {
// NOTE: we modify the new set to prevent concurrent modification exception, as other threads (e.g. in
// canCommit) use it unguarded
changes.getChangeIds().addAll(committed.getChangeIds());
}
committedChangeSets.put(commitPointer, changes);
}
// remove from in-progress set, so that it does not get excluded in the future
InProgressTx previous = inProgress.remove(transactionId);
if (previous == null) {
// tx was not in progress! perhaps it timed out and is invalid? try to remove it there.
if (invalidTxList.remove(transactionId)) {
LOG.info("Tx invalid list: removed committed tx {}", transactionId);
}
} else {
LongArrayList checkpointPointers = previous.getCheckpointWritePointers();
if (!checkpointPointers.isEmpty()) {
// adjust the write pointer to be the last checkpoint of the tx and remove all checkpoints from inProgress
writePointer = checkpointPointers.getLong(checkpointPointers.size() - 1);
inProgress.keySet().removeAll(previous.getCheckpointWritePointers());
}
}
// moving read pointer
moveReadPointerIfNeeded(writePointer);
// All committed change sets that are smaller than the earliest started transaction can be removed.
// here we ignore transactions that have no timeout, they are long-running and don't participate in
// conflict detection.
// TODO: for efficiency, can we do this once per-log in replayLogs instead of once per edit?
committedChangeSets.headMap(TxUtils.getFirstShortInProgress(inProgress)).clear();
}
public void abort(Transaction tx) {
// guard against changes to the transaction log while processing
txMetricsCollector.rate("abort");
Stopwatch timer = new Stopwatch().start();
this.logReadLock.lock();
try {
synchronized (this) {
ensureAvailable();
doAbort(tx.getTransactionId(), tx.getCheckpointWritePointers(), tx.getType());
}
appendToLog(TransactionEdit.createAborted(tx.getTransactionId(), tx.getType(), tx.getCheckpointWritePointers()));
txMetricsCollector.histogram("abort.latency", (int) timer.elapsedMillis());
} finally {
this.logReadLock.unlock();
}
}
private void doAbort(long writePointer, long[] checkpointWritePointers, TransactionType type) {
committingChangeSets.remove(writePointer);
if (type == TransactionType.LONG) {
// Long running transactions cannot be aborted as their change sets are not saved,
// and hence the changes cannot be rolled back. Invalidate the long running transaction instead.
doInvalidate(writePointer);
return;
}
// makes tx visible (assumes that all operations were rolled back)
// remove from in-progress set, so that it does not get excluded in the future
InProgressTx removed = inProgress.remove(writePointer);
boolean removeInProgressCheckpoints = true;
if (removed == null) {
// tx was not in progress! perhaps it timed out and is invalid? try to remove it there.
if (invalidTxList.remove(writePointer)) {
// the tx and all its children were invalidated: no need to remove them from inProgress
removeInProgressCheckpoints = false;
// remove any invalidated checkpoint pointers
// this will only be present if the parent write pointer was also invalidated
if (checkpointWritePointers != null) {
for (long checkpointWritePointer : checkpointWritePointers) {
invalidTxList.remove(checkpointWritePointer);
}
}
LOG.info("Tx invalid list: removed aborted tx {}", writePointer);
}
}
if (removeInProgressCheckpoints && checkpointWritePointers != null) {
for (long checkpointWritePointer : checkpointWritePointers) {
inProgress.remove(checkpointWritePointer);
}
}
// removed a tx from excludes: must move read pointer
moveReadPointerIfNeeded(writePointer);
}
public boolean invalidate(long tx) {
// guard against changes to the transaction log while processing
txMetricsCollector.rate("invalidate");
Stopwatch timer = new Stopwatch().start();
this.logReadLock.lock();
try {
boolean success;
synchronized (this) {
ensureAvailable();
success = doInvalidate(tx);
}
appendToLog(TransactionEdit.createInvalid(tx));
txMetricsCollector.histogram("invalidate.latency", (int) timer.elapsedMillis());
return success;
} finally {
this.logReadLock.unlock();
}
}
private boolean doInvalidate(long writePointer) {
ChangeSet previousChangeSet = committingChangeSets.remove(writePointer);
// remove from in-progress set, so that it does not get excluded in the future
InProgressTx previous = inProgress.remove(writePointer);
// This check is to prevent from invalidating committed transactions
if (previous != null || previousChangeSet != null) {
// add tx to invalids
invalidTxList.add(writePointer);
if (previous == null) {
LOG.debug("Invalidating tx {} in committing change sets but not in-progress", writePointer);
} else {
// invalidate any checkpoint write pointers
LongArrayList childWritePointers = previous.getCheckpointWritePointers();
if (!childWritePointers.isEmpty()) {
invalidTxList.addAll(childWritePointers);
inProgress.keySet().removeAll(childWritePointers);
}
}
String clientId = DEFAULT_CLIENTID;
if (previous != null && previous.getClientId() != null) {
clientId = previous.getClientId();
}
LOG.info("Tx invalid list: added tx {} belonging to client '{}' because of invalidate", writePointer, clientId);
if (previous != null && !previous.isLongRunning()) {
// tx was short-running: must move read pointer
moveReadPointerIfNeeded(writePointer);
}
return true;
}
return false;
}
/**
* Removes the given transaction ids from the invalid list.
* @param invalidTxIds transaction ids
* @return true if invalid list got changed, false otherwise
*/
public boolean truncateInvalidTx(Set invalidTxIds) {
// guard against changes to the transaction log while processing
txMetricsCollector.rate("truncateInvalidTx");
Stopwatch timer = new Stopwatch().start();
this.logReadLock.lock();
try {
boolean success;
synchronized (this) {
ensureAvailable();
success = doTruncateInvalidTx(invalidTxIds);
}
appendToLog(TransactionEdit.createTruncateInvalidTx(invalidTxIds));
txMetricsCollector.histogram("truncateInvalidTx.latency", (int) timer.elapsedMillis());
return success;
} finally {
this.logReadLock.unlock();
}
}
private boolean doTruncateInvalidTx(Set toRemove) {
LOG.info("Removing tx ids {} from invalid list", toRemove);
return invalidTxList.removeAll(toRemove);
}
/**
* Removes all transaction ids started before the given time from invalid list.
* @param time time in milliseconds
* @return true if invalid list got changed, false otherwise
* @throws InvalidTruncateTimeException if there are any in-progress transactions started before given time
*/
public boolean truncateInvalidTxBefore(long time) throws InvalidTruncateTimeException {
// guard against changes to the transaction log while processing
txMetricsCollector.rate("truncateInvalidTxBefore");
Stopwatch timer = new Stopwatch().start();
this.logReadLock.lock();
try {
boolean success;
synchronized (this) {
ensureAvailable();
success = doTruncateInvalidTxBefore(time);
}
appendToLog(TransactionEdit.createTruncateInvalidTxBefore(time));
txMetricsCollector.histogram("truncateInvalidTxBefore.latency", (int) timer.elapsedMillis());
return success;
} finally {
this.logReadLock.unlock();
}
}
private boolean doTruncateInvalidTxBefore(long time) throws InvalidTruncateTimeException {
LOG.info("Removing tx ids before {} from invalid list", time);
long truncateWp = time * TxConstants.MAX_TX_PER_MS;
// Check if there any in-progress transactions started earlier than truncate time
if (inProgress.lowerKey(truncateWp) != null) {
throw new InvalidTruncateTimeException("Transactions started earlier than " + time + " are in-progress");
}
// Find all invalid transactions earlier than truncateWp
LongSet toTruncate = new LongArraySet();
LongIterator it = invalidTxList.toRawList().iterator();
while (it.hasNext()) {
long wp = it.nextLong();
if (wp < truncateWp) {
toTruncate.add(wp);
}
}
LOG.info("Removing tx ids {} from invalid list", toTruncate);
return invalidTxList.removeAll(toTruncate);
}
public Transaction checkpoint(Transaction originalTx) throws TransactionNotInProgressException {
txMetricsCollector.rate("checkpoint");
Stopwatch timer = new Stopwatch().start();
Transaction checkpointedTx;
long txId = originalTx.getTransactionId();
long newWritePointer;
// guard against changes to the transaction log while processing
this.logReadLock.lock();
try {
synchronized (this) {
ensureAvailable();
// check that the parent tx is in progress
InProgressTx parentTx = inProgress.get(txId);
if (parentTx == null) {
if (invalidTxList.contains(txId)) {
throw new TransactionNotInProgressException(
String.format("Transaction %d is not in progress because it was invalidated", txId));
} else {
throw new TransactionNotInProgressException(
String.format("Transaction %d is not in progress", txId));
}
}
newWritePointer = getNextWritePointer();
doCheckpoint(newWritePointer, txId);
// create a new transaction with the same read snapshot, plus the additional checkpoint write pointer
// the same read snapshot is maintained to
checkpointedTx = new Transaction(originalTx, newWritePointer,
parentTx.getCheckpointWritePointers().toLongArray());
}
// appending to WAL out of global lock for concurrent performance
// we should still be able to arrive at the same state even if log entries are out of order
appendToLog(TransactionEdit.createCheckpoint(newWritePointer, txId));
} finally {
this.logReadLock.unlock();
}
txMetricsCollector.histogram("checkpoint.latency", (int) timer.elapsedMillis());
return checkpointedTx;
}
private void doCheckpoint(long newWritePointer, long parentWritePointer) {
InProgressTx existingTx = inProgress.get(parentWritePointer);
existingTx.addCheckpointWritePointer(newWritePointer);
addInProgressAndAdvance(newWritePointer, existingTx.getVisibilityUpperBound(), existingTx.getExpiration(),
InProgressType.CHECKPOINT, existingTx.getClientId());
}
// hack for exposing important metric
public int getExcludedListSize() {
return getInvalidSize() + inProgress.size();
}
/**
* @return the size of invalid list
*/
public synchronized int getInvalidSize() {
return this.invalidTxList.size();
}
int getCommittedSize() {
return this.committedChangeSets.size();
}
@Nullable
@VisibleForTesting
InProgressTx getInProgress(long transactionId) {
return inProgress.get(transactionId);
}
private void checkForConflicts(long txId, Set changeIds) throws TransactionConflictException {
if (changeIds.isEmpty()) {
return;
}
for (Map.Entry committed : committedChangeSets.entrySet()) {
// If commit time is greater than tx read-pointer,
// basically not visible but committed means "tx committed after given tx was started"
if (committed.getKey() > txId) {
ChangeId change = overlap(committed.getValue().getChangeIds(), changeIds);
if (change != null) {
throw new TransactionConflictException(txId, change.toString(), committed.getValue().getClientId());
}
}
}
}
/**
* Checks for overlap in two change sets, returns the first common change it finds, or null if no overlap.
*/
@Nullable
private ChangeId overlap(Set a, Set b) {
// iterate over the smaller set, and check for every element in the other set
if (a.size() > b.size()) {
for (ChangeId change : b) {
if (a.contains(change)) {
return change;
}
}
} else {
for (ChangeId change : a) {
if (b.contains(change)) {
return change;
}
}
}
return null;
}
private void moveReadPointerIfNeeded(long committedWritePointer) {
if (committedWritePointer > readPointer) {
readPointer = committedWritePointer;
}
}
/**
* Creates a new Transaction. This method only get called from start transaction, which is already
* synchronized.
*/
private Transaction createTransaction(long writePointer, TransactionType type) {
// For holding the first in progress short transaction Id (with timeout >= 0).
long firstShortTx = Transaction.NO_TX_IN_PROGRESS;
LongArrayList inProgressIds = new LongArrayList(inProgress.size());
for (Map.Entry entry : inProgress.entrySet()) {
long txId = entry.getKey();
inProgressIds.add(txId);
if (firstShortTx == Transaction.NO_TX_IN_PROGRESS && !entry.getValue().isLongRunning()) {
firstShortTx = txId;
}
}
return new Transaction(readPointer, writePointer, invalidTxList.toSortedArray(),
inProgressIds.toLongArray(), firstShortTx, type);
}
private void appendToLog(TransactionEdit edit) {
try {
Stopwatch timer = new Stopwatch().start();
currentLog.append(edit);
txMetricsCollector.rate("wal.append.count");
txMetricsCollector.histogram("wal.append.latency", (int) timer.elapsedMillis());
} catch (IOException ioe) {
abortService("Error appending to transaction log", ioe);
}
}
private void appendToLog(List edits) {
try {
Stopwatch timer = new Stopwatch().start();
currentLog.append(edits);
txMetricsCollector.rate("wal.append.count", edits.size());
txMetricsCollector.histogram("wal.append.latency", (int) timer.elapsedMillis());
} catch (IOException ioe) {
abortService("Error appending to transaction log", ioe);
}
}
/**
* Called from the tx service every 10 seconds.
* This hack is needed because current metrics system is not flexible when it comes to adding new metrics.
*/
public void logStatistics() {
LOG.info("Transaction Statistics: write pointer = " + lastWritePointer +
", invalid = " + getInvalidSize() +
", in progress = " + inProgress.size() +
", committing = " + committingChangeSets.size() +
", committed = " + committedChangeSets.size());
}
@SuppressWarnings("unused")
@VisibleForTesting
public TransactionStateStorage getTransactionStateStorage() {
return persistor;
}
private abstract static class DaemonThreadExecutor extends Thread {
private final AtomicBoolean stopped = new AtomicBoolean(false);
DaemonThreadExecutor(String name) {
super(name);
setDaemon(true);
}
public void run() {
try {
while (!isInterrupted() && !stopped.get()) {
doRun();
synchronized (stopped) {
stopped.wait(getSleepMillis());
}
}
} catch (InterruptedException ie) {
LOG.info("Interrupted thread " + getName());
}
// perform any final cleanup
onShutdown();
LOG.info("Exiting thread " + getName());
}
public abstract void doRun();
protected abstract long getSleepMillis();
protected void onShutdown() {
}
public void shutdown() {
if (stopped.compareAndSet(false, true)) {
synchronized (stopped) {
stopped.notifyAll();
}
}
}
}
/**
* Type of in-progress transaction.
*/
public enum InProgressType {
/**
* Short transactions detect conflicts during commit.
*/
SHORT(TransactionType.SHORT),
/**
* Long running transactions do not detect conflicts during commit.
*/
LONG(TransactionType.LONG),
/**
* Check-pointed transactions are recorded as in-progress.
*/
CHECKPOINT(null);
private final TransactionType transactionType;
InProgressType(TransactionType transactionType) {
this.transactionType = transactionType;
}
public static InProgressType of(TransactionType type) {
switch (type) {
case SHORT: return SHORT;
case LONG: return LONG;
default: throw new IllegalArgumentException("Unknown TransactionType " + type);
}
}
@Nullable
public TransactionType getTransactionType() {
return transactionType;
}
}
/**
* Represents some of the info on in-progress tx
*/
public static final class InProgressTx {
/** the oldest in progress tx at the time of this tx start */
private final long visibilityUpperBound;
private final long expiration;
private final InProgressType type;
private final LongArrayList checkpointWritePointers;
// clientId is not part of hashCode computation or equality check since it is not persisted. Once it is persisted
// and restored, we can include it in the above.
private final String clientId;
public InProgressTx(String clientId, long visibilityUpperBound, long expiration, InProgressType type) {
this(clientId, visibilityUpperBound, expiration, type, new LongArrayList());
}
public InProgressTx(long visibilityUpperBound, long expiration, InProgressType type) {
this(visibilityUpperBound, expiration, type, new LongArrayList());
}
public InProgressTx(String clientId, long visibilityUpperBound, long expiration, InProgressType type,
LongArrayList checkpointWritePointers) {
this.visibilityUpperBound = visibilityUpperBound;
this.expiration = expiration;
this.type = type;
this.checkpointWritePointers = checkpointWritePointers;
this.clientId = clientId;
}
public InProgressTx(long visibilityUpperBound, long expiration, InProgressType type,
LongArrayList checkpointWritePointers) {
this.visibilityUpperBound = visibilityUpperBound;
this.expiration = expiration;
this.type = type;
this.checkpointWritePointers = checkpointWritePointers;
this.clientId = null;
}
// For backwards compatibility when long running txns were represented with -1 expiration
@Deprecated
public InProgressTx(long visibilityUpperBound, long expiration) {
this(visibilityUpperBound, expiration, null);
}
public long getVisibilityUpperBound() {
return visibilityUpperBound;
}
public long getExpiration() {
return expiration;
}
@Nullable
public InProgressType getType() {
return type;
}
@Nullable
public String getClientId() {
return clientId;
}
public boolean isLongRunning() {
if (type == null) {
// for backwards compatibility when long running txns were represented with -1 expiration
return expiration == -1;
}
return type == InProgressType.LONG;
}
public void addCheckpointWritePointer(long checkpointWritePointer) {
checkpointWritePointers.add(checkpointWritePointer);
}
public LongArrayList getCheckpointWritePointers() {
return checkpointWritePointers;
}
@Override
public boolean equals(Object o) {
if (o == null || !(o instanceof InProgressTx)) {
return false;
}
if (this == o) {
return true;
}
InProgressTx other = (InProgressTx) o;
return Objects.equal(visibilityUpperBound, other.getVisibilityUpperBound()) &&
Objects.equal(expiration, other.getExpiration()) &&
Objects.equal(type, other.type) &&
Objects.equal(checkpointWritePointers, other.checkpointWritePointers);
}
@Override
public int hashCode() {
return Objects.hashCode(visibilityUpperBound, expiration, type, checkpointWritePointers);
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("visibilityUpperBound", visibilityUpperBound)
.add("expiration", expiration)
.add("type", type)
.add("checkpointWritePointers", checkpointWritePointers)
.add("clientId", clientId)
.toString();
}
}
/**
* Represents a set of changes from a client.
*/
public static class ChangeSet {
final String clientId;
final Set changeIds;
ChangeSet(@Nullable String clientId, Set changeIds) {
this.clientId = clientId;
this.changeIds = changeIds;
}
@Nullable
public String getClientId() {
return clientId;
}
public Set getChangeIds() {
return changeIds;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy