All Downloads are FREE. Search and download functionalities are using the official Maven repository.

oracle.kv.impl.rep.migration.MigrationService Maven / Gradle / Ivy

Go to download

NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.

The newest version!
/*-
 * Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle NoSQL
 * Database made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle NoSQL Database for a copy of the license and
 * additional information.
 */

package oracle.kv.impl.rep.migration;

import java.io.IOException;
import java.nio.channels.Channel;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Level;
import java.util.logging.Logger;

import oracle.kv.KVVersion;
import oracle.kv.impl.rep.IncorrectRoutingException;
import oracle.kv.impl.rep.RepNode;
import oracle.kv.impl.rep.RepNodeService.Params;
import oracle.kv.impl.rep.migration.PartitionMigrations.MigrationRecord;
import oracle.kv.impl.rep.migration.PartitionMigrations.SourceRecord;
import oracle.kv.impl.rep.migration.TransferProtocol.TransferRequest;
import oracle.kv.impl.rep.migration.generation.PartitionMDException;
import oracle.kv.impl.test.TestHook;
import oracle.kv.impl.test.TestHookExecute;
import oracle.kv.impl.topo.PartitionId;
import oracle.kv.impl.topo.RepGroupId;
import oracle.kv.impl.topo.RepNodeId;
import oracle.kv.impl.util.KVThreadFactory;
import oracle.kv.impl.util.server.LoggerUtils;

import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.rep.RepInternal;
import com.sleepycat.je.rep.ReplicatedEnvironment;
import com.sleepycat.je.rep.impl.RepImpl;
import com.sleepycat.je.rep.net.DataChannel;
import com.sleepycat.je.rep.utilint.RepUtils;
import com.sleepycat.je.rep.utilint.ServiceDispatcher;
import com.sleepycat.je.rep.utilint.ServiceDispatcher.Response;

/**
 * Migration service. This object is registered with the JE service framework
 * and handles requests to migrate partitions from target nodes. When a
 * service request is received the details of the request are read from the
 * newly established channel and if valid, a MigrationSource thread is
 * started to handle the actual data movement.
 *
 * The initial request is the only message sent from the target to the source
 * node. After that all communication is from the source to the target.
 */
public class MigrationService implements Runnable {

    /* Name used to register with the JE service framework */
    public static final String SERVICE_NAME = "PartitionMigration";

    /* min store version to use partition generation db */
    private static final KVVersion MIN_STORE_VER_PART_GEN_DB = KVVersion.R18_3;
    private final Logger logger;

    /* Wait indefinitely for somebody to request the service. */
    private static final long POLL_TIMEOUT = Long.MAX_VALUE;

    private final RepNode repNode;

    private final Params params;

    /* The maximum number of target streams which can run concurrently. */
    private final int concurrentSourceLimit;

    final MigrationManager manager;

    private ThreadFactory sourceThreadFactory = null;

    /*
     * Queue for the JE service framework. Channels for incoming requests are
     * placed on this queue by the framework, and are pulled in this thread's
     * run method.
     */
    private final BlockingQueue queue =
                                new LinkedBlockingQueue<>();

    /* Maps the partition Id with the migration source */
    private final Map sourceMap = new HashMap<>();

    /* True if the service is accepting migration requests. */
    private volatile boolean enabled = false;

    /* Count of errors processing requests */
    private int requestErrors = 0;

    /* For unit tests */
    TestHook readHook;
    private TestHook> responseHook;

    MigrationService(RepNode repNode, MigrationManager manager, Params params) {
        this.repNode = repNode;
        this.manager = manager;
        this.params = params;
        concurrentSourceLimit =
                        params.getRepNodeParams().getConcurrentSourceLimit();
        logger = LoggerUtils.getLogger(this.getClass(), params);
    }

    synchronized void getStatus(HashSet status) {
        for (MigrationSource source : sourceMap.values()) {
            status.add(source.getStatus());
        }
    }

    synchronized PartitionMigrationStatus getStatus(PartitionId partitionId) {
        final MigrationSource source = sourceMap.get(partitionId);
        return (source == null) ? null : source.getStatus();
    }

    /**
     * Returns true if:
     * a) there are running sources
     * b) there are transfers which have completed sources (i.e reached #3 End
     * of Data in the ToO), but have not finished the entire ToO protocol.
     *
     * Guarding against (b) is necessary because it's possible for the ToO
     * operation to fail, causing the state of the source to revert back to
     * pre-transfer conditions, specifically the migrated partition is
     * re-instated on the source. Therefore it is important that no one else
     * change any state dependent on the migrated partition until completely
     * finished. [#24245]
     */
    synchronized boolean pendingSources() {
        if (getNumRunning() > 0) {
            return true;
        }

        /*
         * Though there are no sources running, we need to check if any
         * transfers have completed but the migration is still waiting
         * for ToO. These will appear as completed source records.
         */
        final PartitionMigrations migrations = manager.getMigrations();
        if (migrations == null) {
            return false;
        }
        final Iterator itr = migrations.completed();
        while (itr.hasNext()) {
            if (itr.next() instanceof SourceRecord) {
                return true;
            }
        }
        return false;
    }

    /**
     * Starts the service by registering with the JE service framework.
     */
    synchronized void start(ReplicatedEnvironment repEnv) {
        if (enabled) {
            throw new IllegalStateException("Service already started");
        }
        assert repEnv != null;

        final RepImpl repImpl = RepInternal.getRepImpl(repEnv);
        if (repImpl == null) {

            /*
             * Env was closed. A subsequent state transition when the
             * env is reopened, will register the dispatcher if necessary.
             */
            return;
        }
        final ServiceDispatcher dispatcher =
            repImpl.getRepNode().getServiceDispatcher();

        if (dispatcher.isRegistered(SERVICE_NAME)) {
            throw new IllegalStateException("Service already registered");
        }

        enabled = true;
        final Thread t = new KVThreadFactory(" migration service", logger).
                                                        newThread(this);
        dispatcher.register(dispatcher.new LazyQueuingService(SERVICE_NAME,
                                                              queue, t));
        logger.info("Migration service accepting requests.");
    }

    /**
     * Stops the service.
     *
     * @param shutdown true if the node is shutting down
     */
    synchronized void stop(boolean shutdown, boolean wait,
                           ReplicatedEnvironment repEnv) {
        assert repEnv != null;

        if (!enabled) {
            return;
        }
        enabled = false;

        /**
         * Since the rep node may be in an incomplete state during shutdown
         * do not attempt to cancel registration with the service dispatcher.
         */
        if (!shutdown) {
            final RepImpl repImpl = RepInternal.getRepImpl(repEnv);

            if (repImpl != null) {
                final ServiceDispatcher dispatcher =
                                    repImpl.getRepNode().getServiceDispatcher();

                if (dispatcher.isRegistered(SERVICE_NAME)) {
                    logger.log(Level.INFO, "Stopping {0}", this);

                    /* This will interrupt the service thread if needed */
                    dispatcher.cancel(SERVICE_NAME);
                }
            }
        }

        for (MigrationSource source : sourceMap.values()) {
            source.cancel(wait);
        }
        sourceMap.clear();
    }

    /**
     * Shuts down a source and waits for it to stop. This is used
     * when the admin needs to cleanup after a cancel or failure.
     *
     * @param partitionId
     * @param targetRGId
     */
    synchronized void cancel(PartitionId partitionId, RepGroupId targetRGId) {
        final MigrationSource source = sourceMap.get(partitionId);

        if ((source != null) &&
            (source.getTargetGroupId() == targetRGId.getGroupId())) {
            source.cancel(true);
            removeSource(partitionId);
        }
    }

    /**
     * Returns the migration source for the specified partition. If there
     * isn't a migration going on for that partition, null is returned.
     *
     * @param partitionId a partition ID
     * @return a migration source or null
     */
    synchronized MigrationSource getSource(PartitionId partitionId) {
        return sourceMap.get(partitionId);
    }

    synchronized void removeSource(PartitionId partitionId) {
        sourceMap.remove(partitionId);
    }

    @Override
    public void run() {

        /* This thread is run the first time a service request comes in for the
         * migration service. Once started it will remain running until the
         * service is unregistered or canceled. If unregistered the thread will
         * be interrupted.
         */
        logger.log(Level.INFO, "Migration service thread started.");

        try {
            while (enabled) {
                DataChannel channel = null;
                try {
                    channel = queue.poll(POLL_TIMEOUT, TimeUnit.MILLISECONDS);

                    if (channel == RepUtils.CHANNEL_EOF_MARKER) {
                        logger.info("EOF marker - shutdown");
                        return;
                    }

                    if (channel != null) {
                        processRequest(channel);
                    }
                } catch (IOException ioe) {
                    closeChannel(channel);
                    logger.log(Level.INFO,
                               "IOException processing migration request: ",
                               ioe);
                } catch (InterruptedException ie) {
                    logger.info("Migration service interrupted");
                    return;
                }
            }
        } finally {
            logger.info("Migration service thread exit");
        }
    }

    /**
     * Closes the specified channel, logging any resulting exceptions.
     *
     * @param channel a channel
     */
    private void closeChannel(Channel channel) {
        if (channel != null) {
            try {
                channel.close();
            } catch (IOException ioe) {
                logger.log(Level.WARNING, "Exception during cleanup", ioe);
            }
        }
    }

    /**
     * Processes the initial service request. The migration details are read
     * from the channel and if valid a migration source thread is created
     * and started.
     *
     * @param channel a channel
     * @throws IOException resulting from operations on the channel
     */
    private void processRequest(DataChannel channel) throws IOException {

        final TransferRequest request = TransferRequest.read(channel);

        final AtomicReference hookedResponse =
                new AtomicReference<>();
        assert TestHookExecute.doHookIfSet(responseHook, hookedResponse);

        if ((responseHook != null) && (hookedResponse.get() != null)) {
            final Response response = hookedResponse.get();
            if (response.equals(Response.BUSY)) {
                reportBusy(concurrentSourceLimit, "Test busy", channel);
            } else {
                reportError(response, "Test error: " + response, channel);
            }
            return;
        }

        final PartitionId partitionId = new PartitionId(request.partitionId);

        final RepNodeId targetRNId = request.targetRNId;
        final int targetGroupId = targetRNId.getGroupId();
        /* If no target, we are only transferring data, not migrating */
        final boolean transferOnly =
            targetRNId.equals(TransferProtocol.TRANSFER_ONLY_TARGET);

        /*
         * ensure the store is upgraded to minimal version, if not report
         * busy to target and the target may retry later.
         */
        if (!transferOnly && !checkForStoreVer(MIN_STORE_VER_PART_GEN_DB)) {
            final String err =  "Cannot migrate " + partitionId +
                                " because the store has not yet upgraded to " +
                                "the minimal version for partition generation" +
                                " db " + MIN_STORE_VER_PART_GEN_DB;
            reportBusy(concurrentSourceLimit, err, channel);
            logger.log(Level.FINE, () -> err);
            return;
        }

        /* Check to make sure the requested partition is here */
        try {
            repNode.getPartitionDB(partitionId);
        } catch (IncorrectRoutingException ire) {

            /*
             * If the request is for an unknown partition, it may be due to
             * completed transfer, in which case the source should reset
             * and let the target try again later.
             */
            if (!transferOnly && checkForRestart(partitionId, targetRNId)) {
                reportBusy(concurrentSourceLimit,
                           "Migration source resetting " + partitionId,
                           channel);
                return;
            }
            reportError(Response.UNKNOWN_SERVICE,
                        "Request for unknown: " + ire.getLocalizedMessage(),
                        channel);
            return;
        }

        /*
         * If the source doesn't know about the target rep group, report back
         * BUSY. Eventually the topology will be updated and we can proceed.
         * The check is necessary because once the migration is completed the
         * source needs to know about the target group in order to forward
         * requests there.
         */
        if (!transferOnly &&
            repNode.getTopology().get(new RepGroupId(targetGroupId)) == null) {
            reportBusy(0,
                       "Migration source needs updated topology, target " +
                       "group " + targetGroupId + " unknown", channel);
            return;
        }

        if (transferOnly) {
            logger.log(Level.FINE,
                       "Received transfer request for {0}",
                       new Object[]{partitionId});
        } else {
            logger.log(Level.FINE,
                       "Received migration request for {0} to {1}",
                       new Object[]{partitionId, targetGroupId});
        }

        synchronized (this) {

            if (!enabled || repNode.getTableManager().isBusyMaintenance()) {
                /* Report 0 streams until enabled */
                reportBusy(0,
                           "Migration source not enabled for " + partitionId,
                           channel);
                return;
            }

            /*
             * Limit the number of concurrent streams. The target will retry
             * until they can get in.
             */
            final int running = getNumRunning();
            if (running >= concurrentSourceLimit) {
                reportBusy(concurrentSourceLimit,
                           "Migration source busy. Number of streams= " +
                           running +
                           ", max= " + concurrentSourceLimit, channel);
                return;
            }

            MigrationSource source = sourceMap.get(partitionId);

            /*
             * If there is a source already running, try to cancel it and ask
             * the target to try again later.
             */
            if (source != null) {

                /*
                 * If the source is a transfer, cancel it regardless of
                 * the request.
                 *
                 * If the source is a migration, only cancel it if the
                 * request is also for a migration.
                 */
                if (source.isTransferOnly() || !transferOnly) {
                    source.cancel(false);
                }
                reportBusy(concurrentSourceLimit,
                           "Source for " + partitionId + " already running: " +
                           source.toString(), channel);
                return;
            }

            /*
             * Initialize the generation table now. It isn't needed until
             * the migration is complete, but it is better to catch a failure
             * now before the migration starts vs. at the end.
             */
            if (!transferOnly) {
                try {
                    manager.initializeGenerationTable();
                } catch (PartitionMDException ex) {
                    final String msg = "Reject migration request because of  " +
                                       "failure to ensure partition" +
                                       " generation db, reason: " +
                                       ex.getMessage();
                    /* let target retry */
                    reportBusy(concurrentSourceLimit, msg, channel);
                    return;
                }
            }

            source = new MigrationSource(channel, partitionId,
                                         request.targetRNId,
                                         repNode, this, params);
            sourceMap.put(partitionId, source);

            try {
                TransferRequest.writeACKResponse(channel);
            } catch (IOException ioe) {
                sourceMap.remove(partitionId);
                closeChannel(channel);
                throw ioe;
            }

            if (sourceThreadFactory == null) {
                sourceThreadFactory =
                     new KVThreadFactory(" partition migration source", logger);
            }
            logger.log(Level.INFO, "Starting {0}", source);

            /* Start streaming K/Vs to target */
            sourceThreadFactory.newThread(source).start();
        }
    }

    private int getNumRunning() {
        assert Thread.holdsLock(this);

        /* First clean out any non-running sources */
        final Iterator itr = sourceMap.values().iterator();
        while (itr.hasNext()) {
            if (!itr.next().isAlive()) {
                itr.remove();
            }
        }
        return sourceMap.size();
    }

    /*
     * Checks to see if there is a source migration record for the specified
     * partition and target. If so, the record is removed and true is returned.
     */
    private boolean checkForRestart(PartitionId partitionId,
                                    RepNodeId targetRNId) {
        final PartitionMigrations migrations = manager.getMigrations();

        if (migrations == null) {
            return false;
        }

        final SourceRecord record = migrations.getSource(partitionId);

        /*
         * If no record, or the target does not match, then no restart.
         * We can only restart if it is the same node making the new request.
         * We can't trust other nodes in the shard since network partition may
         * have more than one node acting as the master. We could incorrectly
         * restore the partition here (as a result of removing the migration
         * record) when it has moved to the new location. Very bad: [#25313].
         */
        if ((record == null) || !record.getTargetRNId().equals(targetRNId)) {
            return false;
        }
        try {
            logger.log(Level.INFO,
                       "Migration source detected restart of {0}, " +
                       "removing completed record",
                       record);
            manager.removeRecord(record, true);
        } catch (DatabaseException de) {
            logger.log(Level.WARNING, "Exception removing " + record, de);
        }
        return true;
    }

    /**
     * Returns true if the store has been upgraded to the give version or
     * higher.
     *
     * @param ver  given store version
     *
     * @return true if the store has been upgraded to the give version or
     * higher false otherwise or unable to determine the store minimal version.
     */
    private boolean checkForStoreVer(KVVersion ver) {
        final boolean ret = repNode.checkStoreVersion(ver);
        logger.log(Level.FINE,
                   () -> "Store has upgraded to or above " + ver + ": " + ret);
        return ret;
    }

    /*
     * Reports a busy condition to the client. The message is also logged
     * at FINE, and the channel is closed.
     */
    private void reportBusy(int numStreams,
                            String message,
                            DataChannel channel) {
        requestErrors++;
        logger.log(Level.FINE, message);
        try {
            TransferRequest.writeBusyResponse(channel, numStreams, message);
        } catch (IOException ioe) {
            logger.log(Level.WARNING, "Exception sending busy response", ioe);
        }
        closeChannel(channel);
    }

    /*
     * Reports and error condition to the client. The message is also logged
     * at INFO, and the channel is closed.
     */
    private void reportError(Response response,
                             String message,
                             DataChannel channel) {
        assert response.equals(Response.FORMAT_ERROR) ||
               response.equals(Response.UNKNOWN_SERVICE) ||
               response.equals(Response.INVALID);

        requestErrors++;
        logger.log(Level.INFO, message);
        try {
            TransferRequest.writeErrorResponse(channel, response, message);
        } catch (IOException ioe) {
            logger.log(Level.WARNING, "Exception sending error response", ioe);
        }
        closeChannel(channel);
    }

    /* -- Unit test -- */

    void setReadHook(TestHook hook) {
        readHook = hook;
    }

    void setResponseHook(TestHook> hook) {
        responseHook = hook;
    }

    @Override
    public String toString() {
        return "MigrationService[" + enabled + ", " + sourceMap.size() +
               ", " + requestErrors + "]";
    }

    /**
     * Unit test only
     */
    public int getRequestErrors() {
        return requestErrors;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy