All Downloads are FREE. Search and download functionalities are using the official Maven repository.

bitronix.tm.recovery.Recoverer Maven / Gradle / Ivy

There is a newer version: 62
Show newest version
/*
 * Copyright (C) 2006-2013 Bitronix Software (http://www.bitronix.be)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package bitronix.tm.recovery;

import bitronix.tm.BitronixXid;
import bitronix.tm.TransactionManagerServices;
import bitronix.tm.internal.LogDebugCheck;
import bitronix.tm.internal.XAResourceHolderState;
import bitronix.tm.journal.JournalRecord;
import bitronix.tm.journal.TransactionLogRecord;
import bitronix.tm.resource.ResourceLoader;
import bitronix.tm.resource.ResourceRegistrar;
import bitronix.tm.resource.common.XAResourceProducer;
import bitronix.tm.utils.Decoder;
import bitronix.tm.utils.ManagementRegistrar;
import bitronix.tm.utils.Service;
import bitronix.tm.utils.Uid;

import javax.transaction.Status;
import javax.transaction.xa.XAException;
import javax.transaction.xa.XAResource;
import javax.transaction.xa.Xid;
import java.io.IOException;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;

/**
 * Recovery process implementation. Here is Mike Spille's description of XA recovery:
 * 

* Straight Line Recovery: *

    *
  • 1. Find transactions that the TM considers dangling and unresolved
  • *
  • 2. Find and reconstitute any {@link XAResource}s which were being used when chunk blowing occured.
  • *
  • 3. Call the recover() method on each of these {@link XAResource}s.
  • *
  • 4. Throw out any {@link Xid}'s in the {@link XAResource}' recover lists which are not owned by this TM.
  • *
  • 5. Correlate {@link Xid}'s that the TM knows about with remaining {@link Xid}'s that the {@link XAResource}s * reported.
  • *
  • 6. For {@link XAResource} {@link Xid}'s that match the global transaction ID which the TM found dangling with * a "Committing..." record, call commit() on those {@link XAResource}s for those {@link Xid}s.
  • *
  • 7. For {@link XAResource} {@link Xid}'s that do not match any dangling "Committing..." records, call * rollback() on those {@link XAResource}s for those {@link Xid}s.
  • *
* Exceptional conditions: *
    *
  • 1. For any rollback() calls from step 6 which reported a Heuristic Commit, you are in danger or * doubt, so run in circles, scream and shout.
  • *
  • 2. For any commit() calls from step 7 which reported a Heuristic Rollback, you are in danger or * doubt, so run in circles, scream and shout.
  • *
  • 3. For any resource you can't reconstitute in in step #2, or who fails on recover in step #3, or who reports * anything like an XAER_RMFAILURE in step 6 or step 7, keep trying to contact them in some implementation * defined manner.
  • *
  • 4. For any heuristic outcome you see reported from an XAResource, call forget() for that * {@link XAResource}/{@link Xid} pair so that the resource can stop holding onto a reference to that transaction
  • *
*

To achieve this, {@link Recoverer} must have access to all previously used resources, even if the journal contains * no trace of some of them. There are two ways of achieving this: either you use the {@link ResourceLoader} to configure * all your resources and everything will be working automatically or by making sure resources are re-created and re-registered.

*

Those are the three steps of the Bitronix implementation: *

    *
  • call recover() on all known resources (Mike's steps 1 to 5)
  • *
  • commit dangling COMMITTING transactions (Mike's step 6)
  • *
  • rollback any remaining recovered transaction (Mike's step 7)
  • *
* * @author Ludovic Orban */ public class Recoverer implements Runnable, Service, RecovererMBean { private static final java.util.logging.Logger log = java.util.logging.Logger.getLogger(Recoverer.class.toString()); private final Map registeredResources = new HashMap<>(); private final Map> recoveredXidSets = new HashMap<>(); private final AtomicBoolean isRunning = new AtomicBoolean(false); private final String jmxName; private volatile Exception completionException; private volatile int committedCount; private volatile int rolledbackCount; private volatile int executionsCount; /** * Constructor Recoverer creates a new Recoverer instance. */ public Recoverer() { String serverId = TransactionManagerServices.getConfiguration() .getServerId(); if (serverId == null) { serverId = ""; } this.jmxName = "bitronix.tm:type=Recoverer,ServerId=" + ManagementRegistrar.makeValidName(serverId); ManagementRegistrar.register(jmxName, this); } /** * Run the recovery process. This method is automatically called by the transaction manager, you should never * call it manually. */ @Override public void run() { if (!isRunning.compareAndSet(false, true)) { log.info("recoverer is already running, abandoning this recovery request"); return; } try { committedCount = 0; rolledbackCount = 0; long oldestTransactionTimestamp = Long.MAX_VALUE; // Collect dangling records from journal, must run before oldestTransactionTimestamp is calculated Map danglingRecords = TransactionManagerServices.getJournal() .collectDanglingRecords(); // Query resources from ResourceRegistrar synchronized (ResourceRegistrar.class) { for (String name : ResourceRegistrar.getResourcesUniqueNames()) { registeredResources.put(name, ResourceRegistrar.get(name)); } if (TransactionManagerServices.isTransactionManagerRunning()) { oldestTransactionTimestamp = TransactionManagerServices.getTransactionManager() .getOldestInFlightTransactionTimestamp(); } } // 1. call recover on all known resources recoverAllResources(); // 2. commit dangling COMMITTING transactions Set committedGtrids = commitDanglingTransactions(oldestTransactionTimestamp, danglingRecords); committedCount = committedGtrids.size(); // 3. rollback any remaining recovered transaction rolledbackCount = rollbackAbortedTransactions(oldestTransactionTimestamp, committedGtrids); if (executionsCount == 0 || committedCount > 0 || rolledbackCount > 0) { log.info("recovery committed " + committedCount + " dangling transaction(s) and rolled back " + rolledbackCount + " aborted transaction(s) on " + registeredResources.size() + " resource(s) [" + getRegisteredResourcesUniqueNames() + "]" + ((TransactionManagerServices.getConfiguration() .isCurrentNodeOnlyRecovery()) ? " (restricted to serverId '" + TransactionManagerServices.getConfiguration() .getServerId() + "')" : "")); } else if (LogDebugCheck.isDebugEnabled()) { log.finer("recovery committed " + committedCount + " dangling transaction(s) and rolled back " + rolledbackCount + " aborted transaction(s) on " + registeredResources.size() + " resource(s) [" + getRegisteredResourcesUniqueNames() + "]" + ((TransactionManagerServices.getConfiguration() .isCurrentNodeOnlyRecovery()) ? " (restricted to serverId '" + TransactionManagerServices.getConfiguration() .getServerId() + "')" : "")); } this.completionException = null; } catch (Exception ex) { this.completionException = ex; log.log(Level.WARNING, "recovery failed, registered resource(s): " + getRegisteredResourcesUniqueNames(), ex); } finally { recoveredXidSets.clear(); registeredResources.clear(); executionsCount++; isRunning.set(false); } } /** * Recover all configured resources and fill the recoveredXidSets with all recovered XIDs. * Step 1. */ private void recoverAllResources() { // a cloned registeredResources Map must be iterated as the original one can be modified in the loop for (Map.Entry entry : new HashMap<>(registeredResources).entrySet()) { String uniqueName = entry.getKey(); XAResourceProducer producer = entry.getValue(); try { if (LogDebugCheck.isDebugEnabled()) { log.finer("performing recovery on " + uniqueName); } Set xids = recover(producer); if (LogDebugCheck.isDebugEnabled()) { log.finer("recovered " + xids.size() + " XID(s) from resource " + uniqueName); } recoveredXidSets.put(uniqueName, xids); producer.setFailed(false); } catch (XAException ex) { producer.setFailed(true); registeredResources.remove(uniqueName); String extraErrorDetails = TransactionManagerServices.getExceptionAnalyzer() .extractExtraXAExceptionDetails(ex); log.log(Level.WARNING, "error running recovery on resource '" + uniqueName + "', resource marked as failed (background recoverer will retry recovery)" + " (error=" + Decoder.decodeXAExceptionErrorCode(ex) + ")" + (extraErrorDetails == null ? "" : ", extra error=" + extraErrorDetails), ex); } catch (Exception ex) { if (producer != null) { producer.setFailed(true); } registeredResources.remove(uniqueName); log.log(Level.WARNING, "error running recovery on resource '" + uniqueName + "', resource marked as failed (background recoverer will retry recovery)", ex); } } } /** * Commit transactions that have a dangling COMMITTING record in the journal. * Transactions younger than oldestTransactionTimestamp are ignored. * Step 2. * * @param oldestTransactionTimestamp * the timestamp of the oldest transaction still in-flight. * @param danglingRecords * a Map using Uid objects GTRID as key and {@link TransactionLogRecord} as value. * * @return a Set of all committed GTRIDs encoded as strings. * * @throws java.io.IOException * if there is an I/O error reading the journal. * @throws RecoveryException * if an error preventing recovery happened. */ private Set commitDanglingTransactions(long oldestTransactionTimestamp, Map danglingRecords) throws IOException, RecoveryException { Set committedGtrids = new HashSet<>(); if (LogDebugCheck.isDebugEnabled()) { log.finer("found " + danglingRecords.size() + " dangling record(s) in journal"); } Iterator> it = danglingRecords.entrySet() .iterator(); while (it.hasNext()) { Entry entry = it.next(); Uid gtrid = entry.getKey(); JournalRecord tlog = entry.getValue(); Set uniqueNames = tlog.getUniqueNames(); Set danglingTransactions = getDanglingTransactionsInRecoveredXids(uniqueNames, tlog.getGtrid()); long txTimestamp = gtrid.extractTimestamp(); if (LogDebugCheck.isDebugEnabled()) { log.finer("recovered XID timestamp: " + txTimestamp + " - oldest in-flight TX timestamp: " + oldestTransactionTimestamp); } if (txTimestamp < oldestTransactionTimestamp) { if (LogDebugCheck.isDebugEnabled()) { log.finer("committing dangling transaction with GTRID " + gtrid); } commit(danglingTransactions); if (LogDebugCheck.isDebugEnabled()) { log.finer("committed dangling transaction with GTRID " + gtrid); } committedGtrids.add(gtrid); Set participatingUniqueNames = filterParticipatingUniqueNamesInRecoveredXids(uniqueNames); if (!participatingUniqueNames.isEmpty()) { if (LogDebugCheck.isDebugEnabled()) { log.finer( "updating journal's transaction with GTRID " + gtrid + " status to COMMITTED for names [" + buildUniqueNamesString(participatingUniqueNames) + "]"); } TransactionManagerServices.getJournal() .log(Status.STATUS_COMMITTED, tlog.getGtrid(), participatingUniqueNames); } else { if (LogDebugCheck.isDebugEnabled()) { log.finer("not updating journal's transaction with GTRID " + gtrid + " status to COMMITTED as no resource could be found (incremental recovery will need to clean this)"); } committedGtrids.remove(gtrid); } } else { if (LogDebugCheck.isDebugEnabled()) { log.finer("skipping in-flight transaction with GTRID " + gtrid); } } } if (LogDebugCheck.isDebugEnabled()) { log.finer("committed " + committedGtrids.size() + " dangling transaction(s)"); } return committedGtrids; } /** * Rollback branches whose {@link Xid} has been recovered on the resource but hasn't been committed. * Those are the 'aborted' transactions of the Presumed Abort protocol. * Step 3. * * @param oldestTransactionTimestamp * the timestamp of the oldest transaction still in-flight. * @param committedGtrids * a set of {@link Uid}s already committed on this resource. * * @return the rolled back branches count. * * @throws RecoveryException * if an error preventing recovery happened. */ private int rollbackAbortedTransactions(long oldestTransactionTimestamp, Set committedGtrids) throws RecoveryException { if (LogDebugCheck.isDebugEnabled()) { log.finer("rolling back aborted branch(es)"); } int rollbackCount = 0; for (Map.Entry> entry : recoveredXidSets.entrySet()) { String uniqueName = entry.getKey(); Set recoveredXids = entry.getValue(); if (LogDebugCheck.isDebugEnabled()) { log.finer("checking " + recoveredXids.size() + " branch(es) on " + uniqueName + " for rollback"); } int count = rollbackAbortedBranchesOfResource(oldestTransactionTimestamp, uniqueName, recoveredXids, committedGtrids); if (LogDebugCheck.isDebugEnabled()) { log.finer("checked " + recoveredXids.size() + " branch(es) on " + uniqueName + " for rollback"); } rollbackCount += count; } if (LogDebugCheck.isDebugEnabled()) { log.finer("rolled back " + rollbackCount + " aborted branch(es)"); } return rollbackCount; } /** * Build a string with comma-separated resources unique names. * * @return the string. */ private String getRegisteredResourcesUniqueNames() { return buildUniqueNamesString(registeredResources.keySet()); } /** * Run the recovery process on the target resource. * Step 1. * * @param producer * the {@link XAResourceProducer} to recover. * * @return a Set of BitronixXids. * * @throws javax.transaction.xa.XAException * if {@link XAResource#recover(int)} call fails. * @throws RecoveryException * if an error preventing recovery happened. */ private Set recover(XAResourceProducer producer) throws XAException, RecoveryException { if (producer == null) { throw new IllegalArgumentException("recoverable resource cannot be null"); } try { if (LogDebugCheck.isDebugEnabled()) { log.finer("running recovery on " + producer); } XAResourceHolderState xaResourceHolderState = producer.startRecovery(); return RecoveryHelper.recover(xaResourceHolderState); } finally { producer.endRecovery(); } } /** * Return {@link DanglingTransaction}s with {@link Xid}s corresponding to the GTRID parameter found in resources * specified by their uniqueNames. * recoverAllResources must have been called before or else the returned list will always be empty. * Step 2. * * @param uniqueNames * a set of uniqueNames. * @param gtrid * the GTRID to look for. * * @return a set of {@link DanglingTransaction}s. */ private Set getDanglingTransactionsInRecoveredXids(Set uniqueNames, Uid gtrid) { Set danglingTransactions = new HashSet<>(); for (String uniqueName : uniqueNames) { if (LogDebugCheck.isDebugEnabled()) { log.finer("finding dangling transaction(s) in recovered XID(s) of resource " + uniqueName); } Set recoveredXids = recoveredXidSets.get(uniqueName); if (recoveredXids == null) { if (LogDebugCheck.isDebugEnabled()) { log.finer("resource " + uniqueName + " did not recover, skipping commit"); } continue; } for (BitronixXid recoveredXid : recoveredXids) { if (gtrid.equals(recoveredXid.getGlobalTransactionIdUid())) { if (LogDebugCheck.isDebugEnabled()) { log.finer("found a recovered XID matching dangling log's GTRID " + gtrid + " in resource " + uniqueName); } danglingTransactions.add(new DanglingTransaction(uniqueName, recoveredXid)); } } } return danglingTransactions; } /** * Commit all branches of a dangling transaction. * Step 2. * * @param danglingTransactions * a set of {@link DanglingTransaction}s to commit. * * @throws RecoveryException * if an error preventing recovery happened. */ private void commit(Set danglingTransactions) throws RecoveryException { if (LogDebugCheck.isDebugEnabled()) { log.finer(danglingTransactions.size() + " branch(es) to commit"); } for (DanglingTransaction danglingTransaction : danglingTransactions) { Xid xid = danglingTransaction.getXid(); String uniqueName = danglingTransaction.getUniqueName(); if (LogDebugCheck.isDebugEnabled()) { log.finer("committing branch with XID " + xid + " on " + uniqueName); } commit(uniqueName, xid); } } /** * Method filterParticipatingUniqueNamesInRecoveredXids ... * * @param uniqueNames * of type Set * * @return Set */ private Set filterParticipatingUniqueNamesInRecoveredXids(Set uniqueNames) { Set recoveredUniqueNames = new HashSet<>(); for (String uniqueName : uniqueNames) { if (LogDebugCheck.isDebugEnabled()) { log.finer("finding dangling transaction(s) in recovered XID(s) of resource " + uniqueName); } Set recoveredXids = recoveredXidSets.get(uniqueName); if (recoveredXids == null) { if (LogDebugCheck.isDebugEnabled()) { log.finer("cannot find resource '" + uniqueName + "' present in the journal, leaving it for incremental recovery"); } } else { recoveredUniqueNames.add(uniqueName); } } return recoveredUniqueNames; } /** * Method buildUniqueNamesString ... * * @param uniqueNames * of type Set * * @return String */ private static String buildUniqueNamesString(Set uniqueNames) { StringBuilder resourcesUniqueNames = new StringBuilder(); Iterator it = uniqueNames.iterator(); while (it.hasNext()) { String uniqueName = it.next(); resourcesUniqueNames.append(uniqueName); if (it.hasNext()) { resourcesUniqueNames.append(", "); } } return resourcesUniqueNames.toString(); } /** * Rollback aborted branches of the resource specified by uniqueName. * Step 3. * * @param oldestTransactionTimestamp * the timestamp of the oldest transaction still in-flight. * @param uniqueName * the unique name of the resource on which to rollback branches. * @param recoveredXids * a set of {@link BitronixXid} recovered on the reource. * @param committedGtrids * a set of {@link Uid}s already committed on the resource. * * @return the rolled back branches count. * * @throws RecoveryException * if an error preventing recovery happened. */ private int rollbackAbortedBranchesOfResource(long oldestTransactionTimestamp, String uniqueName, Set recoveredXids, Set committedGtrids) throws RecoveryException { int abortedCount = 0; for (BitronixXid recoveredXid : recoveredXids) { if (committedGtrids.contains(recoveredXid.getGlobalTransactionIdUid())) { if (LogDebugCheck.isDebugEnabled()) { log.finer("XID has been committed, skipping rollback: " + recoveredXid + " on " + uniqueName); } continue; } long txTimestamp = recoveredXid.getGlobalTransactionIdUid() .extractTimestamp(); if (LogDebugCheck.isDebugEnabled()) { log.finer("recovered XID timestamp: " + txTimestamp + " - oldest in-flight TX timestamp: " + oldestTransactionTimestamp); } if (txTimestamp >= oldestTransactionTimestamp) { if (LogDebugCheck.isDebugEnabled()) { log.finer("skipping XID of in-flight transaction: " + recoveredXid); } continue; } if (LogDebugCheck.isDebugEnabled()) { log.finer("rolling back in-doubt branch with XID " + recoveredXid + " on " + uniqueName); } boolean success = rollback(uniqueName, recoveredXid); if (success) { abortedCount++; } } return abortedCount; } /** * Commit the specified branch of a dangling transaction. * Step 2. * * @param uniqueName * the unique name of the resource on which the commit should be done. * @param xid * the {@link Xid} to commit. * * @return true when commit was successful. * * @throws RecoveryException * if an error preventing recovery happened. */ private boolean commit(String uniqueName, Xid xid) throws RecoveryException { XAResourceProducer producer = registeredResources.get(uniqueName); try { XAResourceHolderState xaResourceHolderState = producer.startRecovery(); return RecoveryHelper.commit(xaResourceHolderState, xid); } finally { producer.endRecovery(); } } /** * Rollback the specified branch of a dangling transaction. * Step 3. * * @param uniqueName * the unique name of the resource on which to rollback branches. * @param xid * the {@link Xid} to rollback. * * @return true when rollback was successful. * * @throws RecoveryException * if an error preventing recovery happened. */ private boolean rollback(String uniqueName, Xid xid) throws RecoveryException { XAResourceProducer producer = registeredResources.get(uniqueName); if (producer == null) { if (LogDebugCheck.isDebugEnabled()) { log.finer("resource " + uniqueName + " has not recovered, skipping rollback"); } return false; } try { XAResourceHolderState xaResourceHolderState = producer.startRecovery(); return RecoveryHelper.rollback(xaResourceHolderState, xid); } finally { producer.endRecovery(); } } /** * Shutdown the service and free all held resources. */ @Override public void shutdown() { ManagementRegistrar.unregister(jmxName); } /** * Get the amount of transactions committed during the last recovery run. * * @return the amount of committed transactions. */ @Override public int getCommittedCount() { return committedCount; } /** * Get the amount of transactions rolled back during the last recovery run. * * @return the amount of rolled back transactions. */ @Override public int getRolledbackCount() { return rolledbackCount; } /** * Get the exception reported when recovery failed. * * @return the exception that made recovery fail or null if last recovery execution was successful. */ @Override public Exception getCompletionException() { return completionException; } /** * Get how many times the recoverer has run since the transaction manager started. * * @return how many times the recoverer has run since the transaction manager started. */ @Override public int getExecutionsCount() { return executionsCount; } /** * Check if the recoverer currently is running. * * @return true if the recoverer currently is running, false otherwise. */ @Override public boolean isRunning() { return isRunning.get(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy