com.bigdata.service.AbstractTransactionService Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Mar 15, 2007
*/
package com.bigdata.service;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.log4j.Logger;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.config.LongValidator;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.ICounterSetAccess;
import com.bigdata.counters.Instrument;
import com.bigdata.ha.HAStatusEnum;
import com.bigdata.journal.ITransactionService;
import com.bigdata.journal.ITx;
import com.bigdata.journal.Journal;
import com.bigdata.journal.RunState;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.journal.ValidationError;
import com.bigdata.resources.ResourceManager;
import com.bigdata.util.InnerCause;
import com.bigdata.util.MillisecondTimestampFactory;
/**
* Centralized transaction manager service. In response to a client request, the
* transaction manager will distribute prepare/commit or abort operations to all
* data services on which writes were made by a transaction. The transaction
* manager also provides global timestamps required for non-transactional commit
* points and various other purposes.
*
* @author Bryan Thompson
* @version $Id$
*
* @todo failover. the service instances will need to track active/committed
* transactions, complain if their clocks get out of alignment, and refuse
* to generate a timestamp that would go backwards when compared to the
* timestamp generated by the last master service.
*/
abstract public class AbstractTransactionService extends AbstractService
implements ITransactionService, IServiceShutdown, ICounterSetAccess {
/**
* Logger.
*/
protected static final Logger log = Logger.getLogger(AbstractTransactionService.class);
// protected static final boolean INFO = log.isInfoEnabled();
// protected static final boolean DEBUG = log.isDebugEnabled();
/**
* Options understood by this service.
*
* @author Bryan Thompson
* @version $Id$
*/
public interface Options {
/**
* How long you want to hold onto the database history (in milliseconds)
* or {@link Long#MAX_VALUE} for an (effectively) immortal database. The
* {@link ITransactionService} tracks the timestamp corresponding to the
* earliest running transaction (if any). When such a transaction
* exists, the actual release time is:
*
*
* releaseTime = min(lastCommitTime - 1, min(earliestRunningTx, now - minimumReleaseAge))
*
*
* This ensures that history in use by running transactions is not
* released even when the minimumReleaseAge is ZERO (0).
*
* When no transactions exist the actual release time is:
*
*
* releaseTime = min(commitTime - 1, now - minimumReleaseAge)
*
*
* This ensures that the the release time advances when no transactions
* are in use, but that the minimum release age is still respected.
*
* @see #DEFAULT_MIN_RELEASE_AGE
* @see #MIN_RELEASE_AGE_1H
* @see #MIN_RELEASE_AGE_1D
* @see #MIN_RELEASE_AGE_1W
* @see #MIN_RELEASE_AGE_NEVER
*
* @see AbstractTransactionService#updateReleaseTime(long)
* @see AbstractTransactionService#notifyCommit(long)
*/
String MIN_RELEASE_AGE = AbstractTransactionService.class.getName()
+ ".minReleaseAge";
/**
* Minimum release age is zero (0). A value of ZERO (0) implies that any
* history not required for the read-committed view is released each
* time the {@link ResourceManager} overflows.
*/
String MIN_RELEASE_AGE_NO_HISTORY = "0";
/** Minimum release age is one minutes. */
String MIN_RELEASE_AGE_1M = "" + 1/* mn */* 60/* sec */* 1000/* ms */;
/** Minimum release age is five minutes. */
String MIN_RELEASE_AGE_5M = "" + 5/* mn */* 60/* sec */* 1000/* ms */;
/** Minimum release age is one hour. */
String MIN_RELEASE_AGE_1H = "" + 1/* hr */* 60/* mn */* 60/* sec */
* 1000/* ms */;
/** Minimum release age is one day. */
String MIN_RELEASE_AGE_1D = "" + 24/* hr */* 60/* mn */* 60/* sec */
* 1000/* ms */;
/** Minimum release age is one week. */
String MIN_RELEASE_AGE_1W = "" + 7/* d */* 24/* hr */* 60/* mn */
* 60/* sec */
* 1000/* ms */;
/** Immortal database (the release time is set to {@link Long#MAX_VALUE}). */
String MIN_RELEASE_AGE_NEVER = "" + Long.MAX_VALUE;
/**
* Default minimum release age is ONE(1L) milliseconds (only the last
* commit point will be retained after a full compacting merge). This
* causes the RWStore to use its recycler mode by default rather than
* its session protection mode.
*
* @see Change DEFAULT_MIN_RELEASE_AGE to 1ms
*/
String DEFAULT_MIN_RELEASE_AGE = "1";
// String DEFAULT_MIN_RELEASE_AGE = MIN_RELEASE_AGE_NO_HISTORY;
}
/**
* If the transaction is read-only and a write operation was requested.
*/
protected static final transient String ERR_READ_ONLY = "Read-only";
/**
* If the transaction is not known to this service.
*/
protected static final transient String ERR_NO_SUCH = "Unknown transaction";
/**
* If a transaction is no longer active.
*/
protected static final transient String ERR_NOT_ACTIVE = "Not active";
/**
* If the transaction service is not in a run state which permits the
* requested operation.
*/
protected static final transient String ERR_SERVICE_NOT_AVAIL = "Service not available";
/**
* The run state for the transaction service.
*/
private volatile TxServiceRunState runState;
/**
* A copy of the callers properties.
*/
private final Properties properties;
/**
* The minimum age in milliseconds before history may be released.
*
* @see Options#MIN_RELEASE_AGE
*/
final private long minReleaseAge;
/**
* An object wrapping the properties used to initialize the service.
*/
protected Properties getProperties() {
return new Properties(properties);
}
/**
* A hash map containing all active transactions. A transaction that is
* preparing will remain in this collection until it has completed (aborted
* or committed). The key is the txId of the transaction.
*
* @todo config param for the initial capacity of the map.
* @todo config for the concurrency rating of the map.
*/
final private ConcurrentHashMap activeTx = new ConcurrentHashMap();
/**
* Return the {@link TxState} associated with the specified transition
* identifier.
*
* Note: This method is an internal API. The caller must adhere to the
* internal synchronization APIs for the transaction service.
*
* @param tx
* The transaction identifier (the signed value, NOT the absolute
* value).
*
* @return The {@link TxState} -or- null
if there is no such
* active transaction.
*/
protected TxState getTxState(final long tx) {
return activeTx.get(tx);
}
/**
* The #of open transactions in any {@link RunState}.
*/
final public int getActiveCount() {
return activeTx.size();
}
public AbstractTransactionService(final Properties properties) {
this.properties = (Properties) properties.clone();
{
this.minReleaseAge = LongValidator.GTE_ZERO.parse(
Options.MIN_RELEASE_AGE, properties.getProperty(
Options.MIN_RELEASE_AGE,
Options.DEFAULT_MIN_RELEASE_AGE));
if (log.isInfoEnabled())
log.info(Options.MIN_RELEASE_AGE + "=" + minReleaseAge);
}
runState = TxServiceRunState.Starting;
}
/**
* Any state other than {@link TxServiceRunState#Halted}.
*/
@Override
public boolean isOpen() {
return runState != TxServiceRunState.Halted;
}
protected void assertOpen() {
if (!isOpen())
throw new IllegalStateException();
}
/**
* Return the {@link RunState}.
*/
public TxServiceRunState getRunState() {
if(!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
return runState;
}
/**
* Change the {@link TxServiceRunState}.
*
* @param newval
* The new value.
*
* @throws IllegalStateException
* if the requested state is not a legal state change.
*/
synchronized protected void setRunState(final TxServiceRunState newval) {
if(!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if (!runState.isTransitionLegal(newval)) {
throw new IllegalStateException("runState=" + runState
+ ", but newval=" + newval);
}
this.runState = newval;
if (log.isInfoEnabled()) {
log.info("runState=" + runState);
}
}
/**
* Polite shutdown. New transactions will not start. This method will block
* until existing transactions (both read-write and read-only) are complete
* (either aborted or committed).
*/
@Override
public void shutdown() {
if(log.isInfoEnabled())
log.info("");
lock.lock();
try {
switch (getRunState()) {
case Shutdown:
case ShutdownNow:
case Halted:
return;
}
// Do not allow new transactions to start.
setRunState(TxServiceRunState.Shutdown);
try {
// wait for running transactions to complete.
awaitRunningTx(10/* logTimeout */, TimeUnit.MILLISECONDS);
} catch (InterruptedException ex) {
// convert to fast shutdown.
log.warn("Interrupted during shutdown - will do fast shutdown: "+ex, ex);
shutdownNow();
return;
}
super.shutdown();
// Service is halted.
setRunState(TxServiceRunState.Halted);
} finally {
lock.unlock();
}
}
/**
* Wait until active transactions complete.
*
* @param logTimeout
* The timeout between {@link #logTimeout(long, TimeUnit)}
* messages.
* @param unit
* The unit for that timeout.
*
* @throws InterruptedException
* if this method is interrupted.
*/
private void awaitRunningTx(long logTimeout, final TimeUnit unit)
throws InterruptedException {
final long begin = System.nanoTime();
long lastLogTime = begin;
// convert to nanoseconds.
logTimeout = unit.toNanos(logTimeout);
long elapsed = 0L;
if(log.isInfoEnabled())
log.info("activeCount="+getActiveCount());
while (getActiveCount() > 0) {
// wait for a transaction to complete.
if (txDeactivate.await(logTimeout, TimeUnit.NANOSECONDS)
&& getActiveCount() == 0) {
// no more tx are active.
// update the elapsed time.
elapsed = System.nanoTime() - begin;
if(log.isInfoEnabled())
log.info("No transactions remaining: elapsed="+elapsed);
return;
}
// update the elapsed time.
elapsed = System.nanoTime() - begin;
{
final long now = System.nanoTime();
final long elapsedLogTime = now - lastLogTime;
if (elapsedLogTime >= logTimeout) {
try {
logTimeout(elapsed, TimeUnit.NANOSECONDS);
} catch (Throwable t) {
log.error("Ignored", t);
}
}
lastLogTime = now;
}
} // while(true)
}
/**
* Logs periodic messages during shutdown.
*
* @param elapsed
* The elapsed time since shutdown was requested.
* @param unit
* The unit in which that time is measured.
*/
private void logTimeout(final long elapsed, final TimeUnit unit) {
log.warn("Waiting on task(s)" + ": elapsed="
+ TimeUnit.NANOSECONDS.toMillis(elapsed) + "ms, #active="
+ getActiveCount() + ", #readWrite="
+ getReadWriteActiveCount() + ", #readOnly="
+ getReadOnlyActiveCount());
}
/**
* Fast shutdown (not immediate since it must abort active transactions).
*
* New transactions will not start and active transactions will be aborted.
* Transactions which are concurrently committing MAY fail (throwing
* exceptions from various methods, including {@link #nextTimestamp()})
* when the service halts.
*/
@Override
public void shutdownNow() {
if(log.isInfoEnabled())
log.info("");
lock.lock();
try {
switch (getRunState()) {
case ShutdownNow:
case Halted:
return;
}
setRunState(TxServiceRunState.ShutdownNow);
// Abort all active transactions.
abortAllTx();
super.shutdownNow();
setRunState(TxServiceRunState.Halted);
} finally {
lock.unlock();
}
}
/**
* Abort all active transactions.
*/
public void abortAllTx() {
lock.lock();
try {
for (long tx : activeTx.keySet()) {
final TxState state = activeTx.get(tx);
if (state == null) {
/*
* Note: concurrent removal or clearing of the weak
* reference is possible.
*/
continue;
}
state.lock.lock();
try {
if (state.isActive()) {
// if (!state.isReadOnly()) {
try {
abortImpl(state);
assert state.isAborted() : state.toString();
} catch (Throwable t) {
log.error(state.toString(), t);
} finally {
deactivateTx(state);
}
}
} finally {
state.lock.unlock();
/*
* Note: We are already holding the outer lock so we do not
* need to acquire it here.
*/
updateReleaseTime(Math.abs(state.tx), null/* deactivatedTx */);
}
} // foreach tx in activeTx
// signal once now that we are run.
txDeactivate.signalAll();
final int activeCount = getActiveCount();
if (activeCount != 0) {
log.warn("Service shutdown with active transactions: #nactive="
+ activeTx.size());
}
} finally {
lock.unlock();
}
}
/**
* Immediate/fast shutdown of the service and then destroys any persistent
* state associated with the service.
*/
@Override
synchronized public void destroy() {
log.warn("");
lock.lock();
try {
shutdownNow();
// Note: no persistent state in this abstract impl.
} finally {
lock.unlock();
}
}
@Override
public long nextTimestamp() {
// setupLoggingContext();
//
//
// try {
/*
* Note: This method is allowed in all run states (after startup)
* since so much depends on the ability to obtain timestamps,
* including the unisolated operations on individual journals or
* data services.
*/
switch (runState) {
case Starting:
// case Halted:
throw new IllegalStateException(ERR_SERVICE_NOT_AVAIL);
default:
break;
}
return _nextTimestamp();
// } finally {
//
// clearLoggingContext();
//
// }
}
/**
* Private version is also used by {@link #start()}.
*
* TODO Why is this synchronized(this)? The timestamp factory is
* synchronized internally and {@link #lastTimestamp} is volatile.
*/
synchronized private final long _nextTimestamp() {
return lastTimestamp = MillisecondTimestampFactory.nextMillis();
}
/** The last timestamp issued. */
private volatile long lastTimestamp;
/**
* {@inheritDoc}
*
* Note: There is an upper bound of one read-write transaction that may be
* created per millisecond (the resolution of {@link #nextTimestamp()}) and
* requests for new read-write transactions contend with other requests for
* {@link #nextTimestamp()}.
*
* Note: The transaction service will refuse to start new transactions whose
* timestamps are LTE to {@link #getReleaseTime()}.
*
* @throws RuntimeException
* Wrapping {@link TimeoutException} if a timeout occurs
* awaiting a start time which would satisfy the request for a
* read-only transaction (this can occur only for read-only
* transactions which must contend for start times which will
* read from the appropriate historical commit point).
*/
@Override
public long newTx(final long timestamp) {
setupLoggingContext();
try {
/*
* Note: It may be possible to increase the concurrency of this
* operation. Many cases do not allow contention since they will
* just use the value returned by nextTimestamp(), which is always
* distinct. Those cases which do allow contention involve search
* for a start time that can read from a specific commit point. Even
* then we may be able to reduce contention using atomic operations
* on [activeTx], e.g., putIfAbsent().
*
* However, pay attention to [lock]. Certainly it is serializing
* newTx() at this point as well several other methods on this API.
* Higher concurrency will require relaxing constraints on atomic
* state transitions governed by [lock]. Perhaps by introducing
* additional locks that are more specific. I don't want to relax
* those constraints until I have a better sense of what must be
* exclusive operations.
*/
lock.lock();
switch (getRunState()) {
case Running:
break;
default:
throw new IllegalStateException(ERR_SERVICE_NOT_AVAIL);
}
try {
final TxState txState = assignTransactionIdentifier(timestamp);
activateTx(txState);
return txState.tx;
} catch(TimeoutException ex) {
throw new RuntimeException(ex);
} catch(InterruptedException ex) {
throw new RuntimeException(ex);
} finally {
lock.unlock();
}
} finally {
clearLoggingContext();
}
}
/**
* A lock used to serialize certain operations that must be atomic with
* respect to the state of the transaction service. Mostly this is used to
* serialize the assignment of transaction identifiers and the update of the
* release time as transactions complete.
*
* Note: To avoid lock ordering problems DO NOT acquire this {@link #lock}
* if you are already holding a {@link TxState#lock}. This causes a lock
* ordering problem and can result in deadlock.
*/
protected final ReentrantLock lock = new ReentrantLock();
/**
* Signaled by {@link #deactivateTx(TxState)} and based on {@link #lock}.
*/
protected final Condition txDeactivate = lock.newCondition();
/** #of transactions started. */
private long startCount = 0L;
/** #of transactions aborted. */
private long abortCount = 0L;
/** #of transactions committed (does not count bare commits). */
private long commitCount = 0L;
/** #of active read-write transactions. */
private final AtomicLong readWriteActiveCount = new AtomicLong(0L);
/** #of active read-only transactions. */
private final AtomicLong readOnlyActiveCount = new AtomicLong(0L);
/** #of transaction started. */
public long getStartCount() {
return startCount;
}
/** #of transaction aborted. */
public long getAbortCount() {
return abortCount;
}
/** #of transaction committed. */
public long getCommitCount() {
return commitCount;
}
public long getReadOnlyActiveCount() {
return readOnlyActiveCount.get();
}
public long getReadWriteActiveCount() {
return readWriteActiveCount.get();
}
// /**
// * The minimum over the absolute values of the active transactions.
// *
// * Note: This is a transaction identifier. It is NOT the commitTime on which
// * that transaction is reading.
// *
// * @see https://sourceforge.net/apps/trac/bigdata/ticket/467
// */
// public long getEarliestTxStartTime() {
//
// return earliestTxStartTime;
//
// }
/**
* Return the {@link TxState} for the earliest active Tx -or-
* null
if there is no active tx.
*
* Note: The {@link #lock} is required in order to make atomic decisions
* about the earliest active tx. Without the {@link #lock}, the tx could
* stop or a new tx could start, thereby invalidating the "earliest active"
* guarantee.
*
* @throws IllegalMonitorStateException
* unless the {@link #lock} is held by the caller.
*/
protected TxState getEarliestActiveTx() {
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
// final TxState state = getTxState(earliestOpenTxId);
//
// return state;
return earliestOpenTx;
}
/**
* The earliest open transaction.
*
* Note: This field is guarded by the {@link #lock}. However, it is declared
* volatile
to provide visibility to {@link #getCounters()}
* without taking the lock.
*
* @see
* IllegalStateException trying to access lexicon index using RWStore
* with recycling
*/
private volatile TxState earliestOpenTx = null;
/**
* {@inheritDoc}
*
* @see Options#MIN_RELEASE_AGE
*/
@Override
public long getReleaseTime() {
if (log.isTraceEnabled())
log.trace("releaseTime=" + releaseTime + ", lastKnownCommitTime="
+ getLastCommitTime());
return releaseTime;
}
private volatile long releaseTime = 0L;
// /** Note: This code is incorrect.
// * Provides correct value for RWStore deferred store releases to be
// * recycled. The effective release time does not need a lock since we are
// * called from within the AbstractJournal commit. The calculation can safely
// * be based on the system time, the min release age and the earliest active
// * transaction. The purpose is to permit the RWStore to recycle data based
// * on the release time which will be in effect at the commit point.
// *
// * @return earliest time that data can be released
// */
// public long getEarliestReleaseTime() {
// final long immediate = System.currentTimeMillis() - minReleaseAge;
//
// return earliestTxStartTime == 0 || immediate < earliestTxStartTime
// ? immediate : earliestTxStartTime;
// }
/**
* Sets the new release time.
*
* Note: For a joined service in HA (the leader or a follower), the release
* time is set by the consensus protocol. Otherwise it is automatically
* maintained by {@link #updateReleaseTime(long, TxState)} and
* {@link #updateReleaseTimeForBareCommit(long)}.
*
* @param newValue
* The new value.
*
* @see
* Query on follower fails during UPDATE on leader
*/
protected void setReleaseTime(final long newValue) {
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
final long oldValue = releaseTime;
if (newValue < oldValue) {
// throw new IllegalStateException("oldValue=" + oldValue
// + ", newValue=" + newValue);
final String msg = "oldValue=" + oldValue + ", newValue="
+ newValue;
log.error(msg, new RuntimeException(msg));
return;
}
if (log.isInfoEnabled())
log.info("newValue=" + newValue);
this.releaseTime = newValue;
}
/**
* This method was introduced to compute the effective timestamp of the
* pinned history in support of the HA TXS. It ignores the
* releaseTime
and reports the minimum of
* now - minReleaseAge
and the readsOnCommitTime of the
* earliest active Tx. If the value would be negative, then ZERO (0L) is
* reported instead.
*
* Note: This duplicates logic in {@link #updateReleaseTime(long)}, but
* handles the special case in HA where the releaseTime is not being updated
* by {@link #updateReleaseTimeForBareCommit(long)}.
*
* @return The effective release time.
*
* @see HA
* TXS
*
* @see #updateReleaseTime(long)
*/
protected long getEffectiveReleaseTimeForHA() {
if (minReleaseAge == Long.MAX_VALUE) {
// All history is pinned.
return 0L;
}
final long lastCommitTime = getLastCommitTime();
lock.lock();
try {
final long now = _nextTimestamp();
// Find the earliest commit time pinned by an active tx.
final long earliestTxReadsOnCommitTime;
final TxState txState = getEarliestActiveTx();
if (txState == null) {
// No active tx. Use now.
earliestTxReadsOnCommitTime = now;
} else {
// Earliest active tx.
earliestTxReadsOnCommitTime = txState.readsOnCommitTime;
}
/*
* The release time will be the minimum of:
*
* a) The timestamp BEFORE the lastCommitTime.
*
* b) The timestamp BEFORE the earliestTxStartTime.
*
* c) minReleaseAge milliseconds in the past.
*
* Note: NEVER let go of the last commit time!
*
* @todo there is a fence post here for [now-minReleaseAge] when
* minReleaseAge is very large, e.g., Long#MAX_VALUE. This is caught
* above for that specific value, but other very large values could
* also cause problems.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/467
*/
final long effectiveReleaseTimeForHA = Math.min(
lastCommitTime - 1,
Math.min(earliestTxReadsOnCommitTime - 1, now
- minReleaseAge));
if (log.isDebugEnabled())
log.debug("releaseTime=" + releaseTime //
+ ", lastCommitTime=" + lastCommitTime
+ ", earliestActiveTx=" + txState//
+ ", readsOnCommitTime=" + earliestTxReadsOnCommitTime//
+ ", (now-minReleaseAge)=" + (now - minReleaseAge)//
+ ": effectiveReleaseTimeForHA=" + effectiveReleaseTimeForHA//
);
return effectiveReleaseTimeForHA;
} finally {
lock.unlock();
}
}
/**
* Adds the transaction from to the local tables.
*
* @param state
* The transaction.
*/
protected void activateTx(final TxState state) {
if (state == null)
throw new IllegalArgumentException();
state.lock.lock();
try {
if (!state.isActive())
throw new IllegalArgumentException();
if (this.earliestOpenTx == null
|| Math.abs(state.tx) < Math.abs(this.earliestOpenTx.tx)) {
/*
* This is the earliest open transaction. This is defined as the
* transaction whose readsOnCommitTime is LTE all other
* transactions and whose absolute txId value is LT all other
* transactions. Since we assign the txIds in intervals GTE the
* readsOnCommitTime and LT the next possible commit point, we
* can maintain this invariant by only comparing abs(txId).
*/
this.earliestOpenTx = state;
}
activeTx.put(state.tx, state);
synchronized(startTimeIndex) {
/*
* Note: Using the absolute value of the assigned timestamp so
* that the index is ordered earliest to most recent. This means
* that the absolute value of the timestamps must be unique,
* otherwise this will throw out an exception.
*/
// startTimeIndex.add(Math.abs(state.tx), state.readsOnCommitTime);
startTimeIndex.add(state);
}
startCount++;
if(state.isReadOnly()) {
readOnlyActiveCount.incrementAndGet();
} else {
readWriteActiveCount.incrementAndGet();
}
if (log.isInfoEnabled())
log.info(state.toString() + ", releaseTime="+releaseTime+", earliestActiveTx="+earliestOpenTx+", startCount=" + startCount
+ ", abortCount=" + abortCount + ", commitCount="
+ commitCount + ", readOnlyActiveCount="
+ readOnlyActiveCount + ", readWriteActiveCount="
+ readWriteActiveCount);
} finally {
state.lock.unlock();
}
}
/**
* Return the commit time on which the transaction is reading.
*
* Note: This method is exposed primarily for the unit tests.
*
* @param txId
* The transaction identifier.
* @return The commit time on which that transaction is reading.
* @throws IllegalArgumentException
* if there is no such transaction.
*/
protected long getReadsOnTime(final long txId) {
final TxState state = activeTx.get(txId);
if(state == null)
throw new IllegalArgumentException();
return state.readsOnCommitTime;
}
/**
* Removes the transaction from the local tables.
*
* Note: The caller MUST own {@link TxState#lock} across this method and
* MUST then do
*
*
* updateReleaseTime(long)
* deactivateTx.signallAll()
*
*
* while holding the outer {@link #lock}.
*
* Note: Normally this method is invoked without the outer {@link #lock}
* which necessitates lifting those method calls out of this method and into
* the caller.
*
* @param state
* The transaction.
*
* @throws IllegalMonitorStateException
* unless the caller is holding the {@link TxState#lock}.
*/
protected void deactivateTx(final TxState state) {
if (state == null)
throw new IllegalArgumentException();
if (!state.lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
// try {
if (!state.isComplete())
throw new IllegalArgumentException();
if (state.isAborted()) {
abortCount++;
} else {
commitCount++;
}
if (state.isReadOnly()) {
readOnlyActiveCount.decrementAndGet();
} else {
readWriteActiveCount.decrementAndGet();
}
if (activeTx.remove(state.tx) == null) {
log.warn("Transaction not in table: " + state);
}
if (log.isInfoEnabled())
log.info(state.toString() + ", startCount=" + startCount
+ ", abortCount=" + abortCount + ", commitCount="
+ commitCount + ", readOnlyActiveCount="
+ readOnlyActiveCount + ", readWriteActiveCount="
+ readWriteActiveCount);
// } finally {
//
// state.lock.unlock();
//
// }
}
/**
* Return true
iff the release time consensus protocol is being
* used to update the releaseTime (HA and this service is either a leader or
* a follower). Return false
iff the service should locally
* manage its own release time (non-HA and HA when the service is
* {@link HAStatusEnum#NotReady}).
*
* Note: When we are using a 2-phase commit, the leader can not update the
* release time from commit() using this methods. It must rely on the
* consensus protocol to update the release time instead.
*
* @see
* Journal HA
*/
protected boolean isReleaseTimeConsensusProtocol() {
return false;
}
/**
* This method MUST be invoked each time a transaction completes with the
* absolute value of the transaction identifier that has just been
* deactivated. The method will remove the transaction entry in the ordered
* set of running transactions ({@link #startTimeIndex}).
*
* If the specified timestamp corresponds to the earliest running
* transaction, then the releaseTime
will be updated and the
* new releaseTime will be set using {@link #setReleaseTime(long)}. For HA,
* the releaseTime is updated by a consensus protocol and the individual
* services MUST NOT advance their releaseTime as transactions complete.
*
* Note: When we are using a 2-phase commit, the leader can not update the
* release time from commit() using this methods. It must rely on the
* consensus protocol to update the release time instead.
*
* @param timestamp
* The absolute value of a transaction identifier that has just
* been deactivated.
* @param deactivatedTx
* The transaction object that has been deactivated -or-
* null
if there are known to be no active
* transactions remaining (e.g., startup and abortAll()).
*
* @see
* IllegalStateException trying to access lexicon index using RWStore
* with recycling
*
* @see
* Query on follower fails during UPDATE on leader
*
* @todo the {@link #startTimeIndex} could be used by
* {@link #findUnusedTimestamp(long, long)} so that it could further
* constrain its search within the half-open interval.
*/
final protected void updateReleaseTime(final long timestamp,
final TxState deactivatedTx) {
if (timestamp <= 0)
throw new IllegalArgumentException();
/*
* Note: The calculation of the new release time needs to be atomic.
*
* Note: This uses the same lock that we use to create new transactions
* in order to prevent a new transaction from starting while we are
* updating the release time.
*
* @todo Should this also be used to serialize both handing out commit
* times (for 2-phase commits) and acknowledging commit times (for
* single phase commits)?
*/
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
// current timestamp.
final long now = _nextTimestamp();
// current value for the releaseTime.
final long oldReleaseTime = this.releaseTime;
/*
* true iff the tx specified by the caller was the earliest running
* transaction.
*/
final boolean isEarliestTx;
// /*
// * The earliest tx remaining now that the caller's tx is complete and
// * [now] if there are no more running transactions.
// */
// final long earliestTxStartTimeX;
// /*
// * The commit time on which the earliest remaining tx is reading and
// * [now] if there are no more running transactions.
// */
// final long earliestTxReadsOnCommitTimeX;
TxState earliestActiveTx = null;
synchronized (startTimeIndex) {
// Note: ZERO (0) is the first tuple in the B+Tree.
// Note: MINUS ONE (-1) means that the B+Tree is empty.
final long indexOf = startTimeIndex.findIndexOf(timestamp);
isEarliestTx = indexOf == 0;
// remove start time from the index.
if (indexOf != -1)
startTimeIndex.remove(timestamp);
// if (!isEarliestTx) {
//
// // No change unless earliest tx terminates.
// return;
//
// }
if (startTimeIndex.getEntryCount() > 0) {
/* There are remaining entries in the [startTimeIndex]. Scan it for the earliestActiveTx remaining.
*
* Note: We need to handle a data race where the earliest active
* tx in the [startTimeIndex] has been concurrently deactivated
* (and removed from the [activeTx] map). This is done by
* scanning until we find the first active tx in the
* [startTimeIndex]. It will typically be the first entry.
*
* Note: transactions can not start or end while we are
* synchronized the [startTimeIndex].
*/
@SuppressWarnings("rawtypes")
final ITupleIterator titr = startTimeIndex.rangeIterator();
while (titr.hasNext()) {
@SuppressWarnings("rawtypes")
final ITuple t = titr.next();
final ITxState0 x = (ITxState0) t.getObject();
// Lookup the [activeTx] map.
final TxState tmp = getTxState(x.getStartTimestamp());
if (tmp == null) {
/*
* Transaction is no longer active (and no longer in the
* activeTx map).
*/
continue;
}
if (!tmp.isActive()) {
// Transaction is no longer active.
continue;
}
// Must not be the tx that we just deactivated.
assert tmp != deactivatedTx;
earliestActiveTx = tmp;
break;
}
// /*
// * The start time associated with the earliest remaining tx.
// */
// final byte[] key = startTimeIndex.keyAt(0L);
//
// earliestTxStartTime = startTimeIndex.decodeKey(key);
//
// /*
// * The commit point on which that tx is reading.
// *
// * @see https://sourceforge.net/apps/trac/bigdata/ticket/467
// */
//
// final byte[] val = startTimeIndex.valueAt(0L);
//
// earliestTxReadsOnCommitTime = startTimeIndex.decodeVal(val);
//
// // The earliest open transaction identifier.
// this.earliestOpenTxId = earliestTxStartTime;
//
// if (log.isTraceEnabled())
// log.trace("earliestOpenTxId=" + earliestTxStartTime);
} else {
/*
* There are no commit points and there are no active
* transactions.
*/
// earliestTxStartTime = earliestTxReadsOnCommitTime = now;
// There are no open transactions.
earliestActiveTx = null;
// if (log.isTraceEnabled())
// log.trace("earliestOpenTxId=[noActiveTx]");
}
// Update the field [volatile write].
this.earliestOpenTx = earliestActiveTx;
if (log.isTraceEnabled())
log.trace("earliestActiveTx=" + earliestActiveTx);
} // synchronized(startTimeIndex)
if (minReleaseAge == Long.MAX_VALUE) {
return;
}
if (isEarliestTx && !isReleaseTimeConsensusProtocol()) {
/*
* The transaction that just finished was the earliest activeTx.
*/
final long earliestTxStartTime = earliestActiveTx == null ? now
: earliestActiveTx.tx;
final long earliestTxReadsOnCommitTime = earliestActiveTx == null ? now
: earliestActiveTx.readsOnCommitTime;
// last commit time on the database.
final long lastCommitTime = getLastCommitTime();
// minimum milliseconds to retain history.
final long minReleaseAge = getMinReleaseAge();
/*
* The release time will be the minimum of:
*
* a) The timestamp BEFORE the lastCommitTime.
*
* b) The timestamp BEFORE the earliestTxStartTime.
*
* c) minReleaseAge milliseconds in the past.
*
* Note: NEVER let go of the last commit time!
*
* @todo there is a fence post here for [now-minReleaseAge] when
* minReleaseAge is very large, e.g., Long#MAX_VALUE. This is caught
* above for that specific value, but other very large values could
* also cause problems.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/467
*/
final long releaseTime = Math.min(
lastCommitTime - 1,
Math.min(earliestTxReadsOnCommitTime - 1, now
- minReleaseAge));
// earliestTxStartTime - 1, now - minReleaseAge));
/*
* We only update the release time if the computed time would
* advance the releaseTime.
*
* Note: The releaseTime MUST NOT go backwards since the database
* may have already released history for any commit point whose
* commitTime is LTE to the existing releaseTime.
*/
if (this.releaseTime < releaseTime) {
if (log.isInfoEnabled())
log.info("lastCommitTime=" + lastCommitTime
+ ", earliestTxStartTime=" + earliestTxStartTime
+ ", minReleaseAge=" + minReleaseAge + ", now="
+ now + ", releaseTime(" + oldReleaseTime + "->"
+ releaseTime + ")");
// update.
setReleaseTime(releaseTime);
}
}
}
/**
* The basic implementation advances the release time periodically as
* commits occur even when there are no transactions in use.
*
* Note: This needs to be a fairly low-latency operation since this method
* is invoked for all commits on all data services and will otherwise be a
* global hotspot.
*/
@Override
public void notifyCommit(final long commitTime) {
lock.lock();
try {
updateReleaseTimeForBareCommit(commitTime);
} finally {
lock.unlock();
}
}
/**
* If there are NO active transactions and the current releaseTime is LT
* (commitTime-1) then compute and set the new releaseTime.
*
* Note: This method was historically part of {@link #notifyCommit(long)}.
* It was moved into its own method so it can be overridden for some unit
* tests.
*
* Note: When we are using a 2-phase commit, the leader can not update the
* release time from commit() using this methods. It must rely on the
* consensus protocol to update the release time instead.
*/
protected void updateReleaseTimeForBareCommit(final long commitTime) {
// if(!lock.isHeldByCurrentThread())
// throw new IllegalMonitorStateException();
lock.lock();
try {
synchronized (startTimeIndex) {
if (!isReleaseTimeConsensusProtocol()
&& this.releaseTime < (commitTime - 1)
&& startTimeIndex.getEntryCount() == 0) {
final long lastCommitTime = commitTime;
final long now = _nextTimestamp();
final long releaseTime = Math.min(lastCommitTime - 1, now
- minReleaseAge);
if (this.releaseTime < releaseTime) {
if (log.isInfoEnabled())
log.info("Advancing releaseTime (no active tx)"
+ ": lastCommitTime=" + lastCommitTime
+ ", minReleaseAge=" + minReleaseAge
+ ", now=" + now + ", releaseTime("
+ this.releaseTime + "->" + releaseTime
+ ")");
setReleaseTime(releaseTime);
}
}
}
} finally {
lock.unlock();
}
}
/**
* Return the minimum #of milliseconds of history that must be preserved.
*
* @todo This centralizes the value for the minimum amount of history that
* will be preserved across the federation.
*
* If minReleaseTime is increased, then the release time can be
* changed to match, but only by NOT advancing it until we are
* retaining enough history.
*
* If minReleaseTime is decreased, then we can immediately release
* more history (or at least as soon as the task runs to notify the
* discovered data services of the new release time).
*/
final public long getMinReleaseAge() {
return minReleaseAge;
}
/**
* A transient index whose keys are the absolute value of
* the start times of all active transactions. The values are the commit
* times on which the corresponding transaction is reading.
*
* Note: The absolute value constraint is imposed so that we can directly
* identify the earliest active transaction in the index by its position (it
* will be at position zero). This would not work if we let in negative
* start times.
*
* Note: In order to support this, {@link #findUnusedTimestamp(long, long)}
* will not return a timestamp whose absolute value corresponds to an active
* transaction.
*/
private final TxId2CommitTimeIndex startTimeIndex = TxId2CommitTimeIndex
.createTransient();
/**
* Assign a transaction identifier for a new transaction.
*
* @param timestamp
* The timestamp.
*
* @return The new transaction object.
*
* @throws InterruptedException
* if interrupted while awaiting a start time which would
* satisfy the request.
* @throws InterruptedException
* if a timeout occurs while awaiting a start time which would
* satisfy the request.
*/
final protected TxState assignTransactionIdentifier(final long timestamp)
throws InterruptedException, TimeoutException {
final long lastCommitTime = getLastCommitTime();
if (timestamp == ITx.UNISOLATED) {
/*
* When timestamp is ZERO (0L), this simply returns the next
* distinct timestamp (with its sign bit flipped).
*
* Note: This is guaranteed to be a valid start time since it is LT
* the next possible commit point for the database.
*
* Note: When we validate, we will read from [-startTime] and the
* journal will identify the 1st commit point LTE [-startTime],
* which will be the most recent commit point on the database as of
* the moment when we assigned this transaction identifier.
*/
// The transaction will read from the most recent commit point.
return new TxState(-nextTimestamp(), lastCommitTime);
}
// if (timestamp > lastTimestamp) {
//
// /*
// * You can't request a historical read for a timestamp which has not
// * yet been issued by this service!
// */
//
// throw new IllegalStateException(
// "Timestamp is in the future: timestamp=" + timestamp
// + ", lastCommitTime=" + lastCommitTime
// + ", lastTimestamp=" + lastTimestamp);
//
// } else
if (timestamp == lastCommitTime) {
/*
* Special case. We just return the next timestamp.
*
* Note: This is equivalent to a request using the symbolic constant
* READ_COMMITTED.
*/
// The transaction will read from the most recent commit point.
return new TxState(nextTimestamp(), lastCommitTime);
}
if (timestamp == ITx.READ_COMMITTED) {
/*
* This is a symbolic shorthand for a read-only transaction that
* will read from the most recent commit point on the database.
*
* Note: Once again we can just issue a timestamp since it will be
* GT lastCommitTime.
*
* Note: If [lastCommitTime == 0], we will still issue the next
* timestamp.
*/
// The transaction will read from the most recent commit point.
return new TxState(nextTimestamp(), lastCommitTime);
}
final long releaseTime = getReleaseTime();
if (timestamp <= releaseTime) {
/*
* This exception is thrown if there is an attempt to start a new
* transaction that would read from historical data which has been
* released. While the data MIGHT still be around, there is no way
* to assert a read lock for that data since the releaseTime is
* already in the future.
*/
throw new IllegalStateException(
"Timestamp is less than or equal to the release time: timestamp="
+ timestamp + ", releaseTime=" + releaseTime);
}
return getStartTime(timestamp);
}
/**
* Assign a distinct timestamp to a historical read that will read from the
* commit point identified by the specified timestamp.
*
* Note: Under some circumstances the assignment of a read-only transaction
* identifier must be delayed until a distinct timestamp becomes available
* between the designed start time and the next commit point.
*
* @param timestamp
* The timestamp (identifies the desired commit point).
*
* @return A new transaction object using a distinct timestamp not in use by
* any transaction that will read from the same commit point.
*/
final private TxState getStartTime(final long timestamp)
throws InterruptedException, TimeoutException {
/*
* Find the commit time from which the tx will read (largest commitTime
* LTE timestamp).
*/
final long commitTime = findCommitTime(timestamp);
// The transaction will read from this commit point.
final long readsOnCommitTime = commitTime == -1 ? 0 : commitTime;
if (commitTime == -1L) {
/*
* There are no commit points in the log.
*
* Note: Just return the next timestamp. It is guaranteed to be GT
* the desired commit time (which does not exist) and LT the next
* commit point.
*/
return new TxState(nextTimestamp(),readsOnCommitTime);
// /*
// * Note: I believe that this can only arise when there are no commit
// * points in the log. The thrown exception is per the top-level api
// * for ITransactionService#newTx(long).
// */
// throw new IllegalStateException(
// "No data for that commit time: timestamp=" + timestamp);
}
/*
* The commit time for the successor of that commit point (GT).
*/
final long nextCommitTime = findNextCommitTime(commitTime);
if (nextCommitTime == -1L) {
/*
* Note: If there is no successor of the desired commit point then
* we can just return the next timestamp. It is guaranteed to be GT
* the desired commit time and LT the next commit point. [Note: this
* case is in fact handled above so you should not get here.]
*/
return new TxState(nextTimestamp(), readsOnCommitTime);
}
// Find a valid, unused timestamp.
final long txId = findUnusedTimestamp(commitTime, nextCommitTime,
1000/* timeout */, TimeUnit.MILLISECONDS);
return new TxState(txId, readsOnCommitTime);
}
/**
* Find the commit time from which the tx will read (largest commitTime LTE
* timestamp).
*
* @param timestamp
* The timestamp.
*
* @return The commit time and -1L if there is no such commit time.
*/
protected abstract long findCommitTime(long timestamp);
/**
* Return the commit time for the successor of that commit point have the
* specified timestamp (a commit time strictly GT the given value).
*
* @param commitTime
* The probe.
* @return The successor or -1L iff the is no successor for that commit
* time.
*/
protected abstract long findNextCommitTime(long commitTime);
/**
* Find a valid, unused timestamp.
*
* Note: Any timestamp in the half-open range [commitTime:nextCommitTime)
* MAY be assigned as all such timestamps will read from the commit point
* associated with [commitTime].
*
* @param commitTime
* The commit time for the commit point on which the tx will read
* (this must be the exact timestamp associated with the desired
* commit point).
* @param nextCommitTime
* The commit time for the successor of that commit point.
* @param timeout
* The maximum length of time to await an available timestamp.
* @param unit
* The unit in which timeout is expressed.
*/
protected long findUnusedTimestamp(final long commitTime,
final long nextCommitTime, final long timeout, final TimeUnit unit)
throws InterruptedException, TimeoutException {
final long begin = System.nanoTime();
final long nanos = unit.toNanos(timeout);
long remaining = nanos;
while (remaining >= 0) {
for (long t = commitTime; t < nextCommitTime; t++) {
if (activeTx.containsKey(t) || activeTx.containsKey(-t)) {
/*
* Note: We do not accept an active read-only startTime.
*
* Note: We do not accept a start time that corresponds to
* the absolute value of an active read-write transaction
* either. This latter constraint is imposed so that the
* keys in the [startTimeIndex] can be the absolute value of
* the assigned timestamp and still be unique.
*
* @todo We could grab the timestamp using an atomic
* putIfAbsent and a special value and the replace the value
* with the desired one (or just construct the TxState
* object each time and discard it if the map contains that
* key). This might let us increase concurrency for newTx().
*/
continue;
}
return t;
}
/*
* Wait for a tx to terminate. If it is in the desired half-open
* range it will be detected by the loop above.
*
* Note: This requires that we use signalAll() since we could be
* waiting on more than one half-open range.
*
* @todo if we used a Condition for the half-open range then we
* could signal exactly that condition.
*
* Note: throws InterruptedException
*/
remaining = nanos - (System.nanoTime() - begin);
if (!txDeactivate.await(remaining, TimeUnit.NANOSECONDS)) {
throw new TimeoutException();
}
remaining = nanos - (System.nanoTime() - begin);
}
throw new TimeoutException();
}
/**
* Note: Declared abstract so that we can hide the {@link IOException}.
*/
@Override
abstract public long getLastCommitTime();
/**
* Implementation must abort the tx on the journal (standalone) or on each
* data service (federation) on which it has written.
*
* Pre-conditions:
*
* - The transaction is {@link RunState#Active}; and
* - The caller holds the {@link TxState#lock}.
*
*
* Post-conditions:
*
* - The transaction is {@link RunState#Aborted}; and
* - The transaction write set has been discarded by each {@link Journal}
* or {@link IDataService} or which it has written (applicable for
* read-write transactions only).
*
*
*
* @param state
* The transaction state as maintained by the transaction server.
*/
abstract protected void abortImpl(final TxState state) throws Exception;
/**
* Implementation must either single-phase commit (standalone journal or a
* transaction that only writes on a single data service) or 2-/3-phase
* commit (distributed transaction running on a federation).
*
* Pre-conditions:
*
* - The transaction is {@link RunState#Active}; and
* - The caller holds the {@link TxState#lock}.
*
*
* Post-conditions (success for read-only transaction or a read-write
* transaction with an empty write set):
*
* - The transaction is {@link RunState#Committed}; and
* - The returned commitTime is ZERO (0L).
*
*
* Post-conditions (success for read-write transaction with a non-empty
* write set):
*
* - The transaction is {@link RunState#Committed};
* - The transaction write set has been made restart-safe by each
* {@link Journal} or {@link IDataService} or which it has written
* (applicable for read-write transactions only); and
* - The application can read exactly the data written by the transaction
* from the commit point identified by the returned commitTime.
*
*
* Post-conditions (failure):
*
* - The transaction is {@link RunState#Aborted}; and
* - The transaction write set has been discarded by each {@link Journal}
* or {@link IDataService} or which it has written (applicable for
* read-write transactions only).
*
*
* @param tx
* The transaction identifier.
*
* @return The commit time for the transaction -or- ZERO (0L) if the
* transaction was read-only or had an empty write set.
*
* @throws Exception
* if something else goes wrong. This will be (or will wrap) a
* {@link ValidationError} if validation fails.
*/
abstract protected long commitImpl(final TxState state) throws Exception;
/**
* Abort the transaction (asynchronous).
*/
@Override
public void abort(final long tx) {
setupLoggingContext();
try {
switch (runState) {
case Running:
case Shutdown:
break;
default:
throw new IllegalStateException(ERR_SERVICE_NOT_AVAIL);
}
final TxState state = activeTx.get(tx);
if (state == null)
throw new IllegalStateException(ERR_NO_SUCH);
boolean wasActive = false;
state.lock.lock();
try {
if (!state.isActive()) {
throw new IllegalStateException(ERR_NOT_ACTIVE);
}
wasActive = true;
try {
abortImpl(state);
assert state.isAborted() : state.toString();
} catch (Throwable t) {
log.error(state.toString(),t);
// } finally {
//
// deactivateTx(state);
//
}
} finally {
// state.lock.unlock();
try {
if (wasActive) {
deactivateTx(state);
}
} finally {
/*
* Note: This avoids a lock ordering problem by releasing
* the inner lock (state.lock) before acquiring the order
* lock.
*/
state.lock.unlock();
if (wasActive) {
lock.lock();
try {
updateReleaseTime(Math.abs(state.tx), state/*deactivatedTx*/);
/*
* Note: signalAll() is required. See code that
* searches the half-open range for a
* read-historical timestamp. It waits on this
* signal, but there can be more than one request
* waiting an requests can be waiting on different
* half-open ranges.
*/
txDeactivate.signalAll();
} finally {
lock.unlock();
}
}
}
}
} finally {
clearLoggingContext();
}
}
@Override
public long commit(final long tx) throws ValidationError {
setupLoggingContext();
try {
switch (runState) {
case Running:
case Shutdown:
break;
default:
throw new IllegalStateException(ERR_SERVICE_NOT_AVAIL);
}
final TxState state = activeTx.get(tx);
if (state == null) {
throw new IllegalStateException(ERR_NO_SUCH);
}
boolean wasActive = false;
state.lock.lock();
try {
if (!state.isActive()) {
throw new IllegalStateException(ERR_NOT_ACTIVE);
}
wasActive = true;
try {
final long commitTime = commitImpl(state);
assert state.isCommitted() : "tx=" + state;
return commitTime;
} catch (Throwable t2) {
// log.error(t2.getMessage(), t2);
assert state.isAborted() : "ex=" + t2 + ", tx=" + state;
if (InnerCause.isInnerCause(t2, ValidationError.class)) {
throw new ValidationError();
}
log.error(t2.getMessage(), t2);
throw new RuntimeException(t2);
}
} finally {
try {
if (wasActive) {
deactivateTx(state);
}
} finally {
/*
* Note: This avoids a lock ordering problem by releasing
* the inner lock (state.lock) before acquiring the order
* lock.
*/
state.lock.unlock();
if (wasActive) {
lock.lock();
try {
updateReleaseTime(Math.abs(state.tx),state/*deactivatedTx*/);
/*
* Note: signalAll() is required. See code that
* searches the half-open range for a
* read-historical timestamp. It waits on this
* signal, but there can be more than one request
* waiting and requests can be waiting on different
* half-open ranges.
*/
txDeactivate.signalAll();
} finally {
lock.unlock();
}
}
}
}
} finally {
clearLoggingContext();
}
}
/**
* Transaction state as maintained by the {@link ITransactionService}.
*
* Note: The commitTime and revisionTime are requested by the local
* transaction manager for single phase commits, which means that this class
* could only know their values for a distributed transaction commit. Hence
* they are not represented here.
*/
public class TxState implements ITxState {
/**
* The transaction identifier.
*/
public final long tx;
/**
* The commit time associated with the commit point against which this
* transaction will read. This will be 0
IFF there are no
* commit points yet. Otherwise it is a real commit time associated with
* some existing commit point.
*/
private final long readsOnCommitTime;
/**
* true
iff the transaction is read-only.
*/
private final boolean readOnly;
/**
* The run state of the transaction
*
* Note: This field is guarded by the {@link #lock}. It is [volatile] to
* make the state visible using a volatile write for the methods on the
* {@link ITxState} interface (isActive(), etc).
*/
private volatile RunState runState = RunState.Active;
/**
* Change the {@link RunState}.
*
* @param newval
* The new {@link RunState}.
*
* @throws IllegalArgumentException
* if the argument is null
.
* @throws IllegalStateException
* if the state transition is not allowed.
*
* @see RunState#isTransitionAllowed(RunState)
*/
public void setRunState(final RunState newval) {
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if (newval == null)
throw new IllegalArgumentException();
if (!runState.isTransitionAllowed(newval)) {
throw new IllegalStateException("runState=" + runState
+ ", newValue=" + newval);
}
this.runState = newval;
}
@Override
final public long getStartTimestamp() {
return tx;
}
@Override
final public long getReadsOnCommitTime() {
return readsOnCommitTime;
}
/**
* The commit time assigned to a distributed read-write transaction
* during the commit protocol and otherwise ZERO (0L).
*
* Note: This field is guarded by the {@link #lock}.
*/
private long commitTime = 0L;
/**
* The commit time assigned to a distributed read-write transaction
* during the commit protocol.
*
* @return The assigned commit time.
*
* @throws IllegalStateException
* if the commit time has not been assigned.
*/
public long getCommitTime() {
if (!lock.isHeldByCurrentThread()) {
throw new IllegalMonitorStateException();
}
if (commitTime == 0L) {
throw new IllegalStateException();
}
return commitTime;
}
/**
* Sets the assigned commit time.
*
* @param commitTime
* The assigned commit time.
*/
protected void setCommitTime(final long commitTime) {
if (!lock.isHeldByCurrentThread()) {
throw new IllegalMonitorStateException();
}
if (commitTime == 0L) {
throw new IllegalArgumentException();
}
if (this.commitTime != 0L) {
throw new IllegalStateException();
}
this.commitTime = commitTime;
}
/**
* The set of {@link DataService}s on which a read-write transaction
* has been started and null
if this is not a read-write
* transaction.
*
* Note: We only track this information for a distributed database.
*/
private final Set dataServices;
/**
* The set of named resources that the transaction has declared across
* all {@link IDataService}s on which it has written and
* null
if this is not a read-write transaction.
*
* Note: We only track this information for a distributed database.
*/
private final Set resources;
/**
* Return the resources declared by the transaction.
*/
public String[] getResources() {
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if (resources == null)
return EMPTY;
return resources.toArray(new String[] {});
}
/**
* Return true
iff the dataService identified by the
* {@link UUID} is one on which this transaction has been started.
*
* @param dataServiceUUID
* The {@link UUID} identifying an {@link IDataService}.
*
* @return true
if this transaction has been started on
* that {@link IDataService}. false
for
* read-only transactions.
*/
public boolean isStartedOn(final UUID dataServiceUUID) {
if(!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if (dataServiceUUID == null)
throw new IllegalArgumentException();
if (dataServices == null)
return false;
return dataServices.contains(dataServiceUUID);
}
/**
* The set of {@link DataService}s on which the transaction has
* written.
*
* @throws IllegalStateException
* if not a read-write transaction.
*/
protected UUID[] getDataServiceUUIDs() {
if(!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if (dataServices == null)
throw new IllegalStateException();
return dataServices.toArray(new UUID[] {});
}
/**
* A per-transaction lock used to serialize operations on a given
* transaction. You need to hold this lock for most of the operations on
* this class, including any access to the {@link RunState}.
*
* Note: DO NOT attempt to acquire the outer
* {@link AbstractTransactionService#lock} if you are already holding
* this {@link #lock}. This is a lock ordering problem and can result in
* a deadlock.
*/
final protected ReentrantLock lock = new ReentrantLock();
/**
*
* @param tx
* The assigned transaction identifier.
* @param readCommitTime
* The commit time associated with the commit point against
* which this transaction will read (may be ZERO if there are
* no commit points, must not be negative).
*/
protected TxState(final long tx, final long readCommitTime) {
if (tx == ITx.UNISOLATED)
throw new IllegalArgumentException();
if (tx == ITx.READ_COMMITTED)
throw new IllegalArgumentException();
if (readCommitTime < 0)
throw new IllegalArgumentException();
this.tx = tx;
this.readsOnCommitTime = readCommitTime;
this.readOnly = TimestampUtility.isReadOnly(tx);
// pre-compute the hash code for the transaction.
this.hashCode = Long.valueOf(tx).hashCode();
this.dataServices = readOnly ? null : new LinkedHashSet();
this.resources = readOnly ? null : new LinkedHashSet();
}
/**
* The hash code is based on the {@link #getStartTimestamp()}.
*/
@Override
final public int hashCode() {
return hashCode;
}
private final int hashCode;
/**
* True iff they are the same object or have the same start timestamp.
*
* @param o
* Another transaction object.
*/
@Override
final public boolean equals(final Object o) {
if (this == o)
return true;
if (!(o instanceof ITx))
return false;
final ITx t = (ITx) o;
return tx == t.getStartTimestamp();
}
/**
* Declares resources on a data service instance on which the
* transaction will write.
*
* @param dataService
* The data service identifier.
* @param resource
* An array of named resources on the data service on which
* the transaction will write (or at least for which it
* requires an exclusive write lock).
*
* @throws IllegalStateException
* if the transaction is read-only.
* @throws IllegalStateException
* if the transaction is not active.
*/
final public void declareResources(final UUID dataService,
final String[] resource) {
if (dataService == null)
throw new IllegalArgumentException();
if (resource == null)
throw new IllegalArgumentException();
if (!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if (readOnly)
throw new IllegalStateException(ERR_READ_ONLY);
if (!isActive())
throw new IllegalStateException(ERR_NOT_ACTIVE);
dataServices.add(dataService);
// Note: sufficient to prevent deadlocks when there are shared indices.
resources.addAll(Arrays.asList(resource));
if (log.isInfoEnabled())
log.info("dataService=" + dataService + ", resource="
+ Arrays.toString(resource));
}
// /**
// * Return true
if the transaction is read-only or if a
// * read-write transaction has not been started on any
// * {@link IDataService}s.
// *
// * WARNING: This method should only be used for distributed
// * databases. It will always report [false] for a standalone database
// * since
// * {@link ITransactionService#declareResources(long, UUID, String[])} is
// * not invoked for a standalone database!
// */
// final public boolean isEmptyWriteSet() {
//
// if(!lock.isHeldByCurrentThread())
// throw new IllegalMonitorStateException();
//
// return readOnly || dataServices.isEmpty();
//
// }
/**
* Return the #of {@link IDataService}s on which a read-write
* transaction has executed an operation.
*
* @return The #of {@link IDataService}.
*
* @throws IllegalStateException
* if the transaction is read-only.
* @throws IllegalMonitorStateException
* if the caller does not hold the lock.
*/
final public int getDataServiceCount() {
if(!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
if(readOnly)
throw new IllegalStateException(ERR_READ_ONLY);
return dataServices.size();
}
/**
* Return true
iff a read-write transaction has started on
* more than one {@link IDataService}.
*/
final boolean isDistributedTx() {
if(!lock.isHeldByCurrentThread())
throw new IllegalMonitorStateException();
return !readOnly && dataServices.size() > 1;
}
/**
* Returns a string representation of the transaction state.
*/
@Override
final public String toString() {
/*
* Note: info reported here MUST be safe and MUST NOT require a
* lock!
*/
return "GlobalTxState{tx=" + tx + ",readsOnCommitTime="
+ readsOnCommitTime + ",readOnly=" + readOnly
+ ",runState=" + runState + "}";
}
@Override
final public boolean isReadOnly() {
return readOnly;
}
@Override
final public boolean isActive() {
// if(!lock.isHeldByCurrentThread())
// throw new IllegalMonitorStateException();
// volatile read.
return runState == RunState.Active;
}
@Override
final public boolean isPrepared() {
// if(!lock.isHeldByCurrentThread())
// throw new IllegalMonitorStateException();
// volatile read.
return runState == RunState.Prepared;
}
@Override
final public boolean isComplete() {
// if(!lock.isHeldByCurrentThread())
// throw new IllegalMonitorStateException();
// volatile read.
final RunState tmp = runState;
return tmp == RunState.Committed || tmp == RunState.Aborted;
}
@Override
final public boolean isCommitted() {
// if(!lock.isHeldByCurrentThread())
// throw new IllegalMonitorStateException();
// volatile read.
return runState == RunState.Committed;
}
@Override
final public boolean isAborted() {
// if(!lock.isHeldByCurrentThread())
// throw new IllegalMonitorStateException();
// volatile read.
return runState == RunState.Aborted;
}
}
/**
* Verifies that {@link #nextTimestamp()} will not report a time before
* {@link #getLastCommitTime()} and then changes the {@link TxServiceRunState}
* to {@link TxServiceRunState#Running}.
*/
@Override
public AbstractTransactionService start() {
if(log.isInfoEnabled())
log.info("");
lock.lock();
try {
switch (getRunState()) {
case Starting:
break;
default:
throw new IllegalStateException();
}
final long timestamp = _nextTimestamp();
final long lastCommitTime = getLastCommitTime();
if (timestamp < lastCommitTime) {
throw new RuntimeException(
"Clock reporting timestamps before lastCommitTime: now="
+ new Date(timestamp) + ", lastCommitTime="
+ new Date(lastCommitTime));
}
/*
* Note: This computes the releaseTime on startup.
*
* Note: While nextTimestamp() is not really a transaction, it is LT
* any possible transaction identifier (since there are no running
* transactions).
*/
updateReleaseTime(timestamp, null/* deactivatedTx */);
setRunState(TxServiceRunState.Running);
} finally {
lock.unlock();
}
return this;
}
@Override
@SuppressWarnings("rawtypes")
public Class getServiceIface() {
return ITransactionService.class;
}
private static transient final String[] EMPTY = new String[0];
/**
* Return the {@link CounterSet}.
*/
public CounterSet getCounters() {
final CounterSet countersRoot = new CounterSet();
countersRoot.addCounter("runState", new Instrument() {
@Override
protected void sample() {
setValue(runState.toString());
}
});
countersRoot.addCounter("#active", new Instrument() {
@Override
protected void sample() {
setValue(getActiveCount());
}
});
countersRoot.addCounter("lastCommitTime", new Instrument() {
@Override
protected void sample() {
setValue(getLastCommitTime());
}
});
countersRoot.addCounter("minReleaseAge", new Instrument() {
@Override
protected void sample() {
setValue(getMinReleaseAge());
}
});
countersRoot.addCounter("releaseTime", new Instrument() {
@Override
protected void sample() {
setValue(getReleaseTime());
}
});
countersRoot.addCounter("startCount", new Instrument() {
@Override
protected void sample() {
setValue(getStartCount());
}
});
countersRoot.addCounter("abortCount", new Instrument() {
@Override
protected void sample() {
setValue(getAbortCount());
}
});
countersRoot.addCounter("commitCount", new Instrument() {
@Override
protected void sample() {
setValue(getCommitCount());
}
});
countersRoot.addCounter("readOnlyActiveCount", new Instrument() {
@Override
protected void sample() {
setValue(getReadOnlyActiveCount());
}
});
countersRoot.addCounter("readWriteActiveCount", new Instrument() {
@Override
protected void sample() {
setValue(getReadWriteActiveCount());
}
});
/*
* Reports the earliest transaction identifier -or- ZERO (0L) if there
* are no active transactions.
*
* Note: This is a txId. It is NOT the commitTime on which that tx is
* reading.
*/
countersRoot.addCounter("earliestReadsOnCommitTime",
new Instrument() {
@Override
protected void sample() {
final TxState tmp = earliestOpenTx;
if (tmp != null)
setValue(tmp.readsOnCommitTime);
}
});
return countersRoot;
}
}