All Downloads are FREE. Search and download functionalities are using the official Maven repository.

oracle.kv.impl.admin.Snapshot Maven / Gradle / Ivy

Go to download

NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.

The newest version!
/*-
 * Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle NoSQL
 * Database made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle NoSQL Database for a copy of the license and
 * additional information.
 */

package oracle.kv.impl.admin;

import java.io.PrintStream;
import java.io.Serializable;
import java.rmi.RemoteException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

import oracle.kv.impl.admin.param.AdminParams;
import oracle.kv.impl.admin.param.Parameters;
import oracle.kv.impl.topo.AdminId;
import oracle.kv.impl.topo.DatacenterId;
import oracle.kv.impl.topo.RepNode;
import oracle.kv.impl.topo.ResourceId;
import oracle.kv.impl.topo.StorageNode;
import oracle.kv.impl.topo.StorageNodeId;
import oracle.kv.impl.topo.Topology;
import oracle.kv.impl.util.SerialVersion;
import oracle.kv.impl.util.server.LoggerUtils;

/**
 * A class to encapsulate store-wide snapshot creation and management.
 * Model for snapshots:
 * -- Snapshots are run for all nodes, masters and replicas
 * -- Snapshots will fail for disabled or not-running services
 *
 * Off-node backups of snapshots is done via direct administrative copies of
 * files associated with a particular snapshot.  In general the snapshot from
 * the nodes acting as master at the time of the snapshot should be used.
 *
 * Operations supported include:
 * createSnapshot(name)
 * listSnapshots()
 * removeSnapshot(name)
 * removeAllSnapshots()
 *
 * The methods, with the exception of createSnapshot() are void.  Results are
 * determined by calling methods after the operation returns.  The general
 * pattern is:
 *   Snapshot snapshot = new Snapshot(...);
 *   snapshot.operation();
 *   if (!snapshot.succeeded()) {
 *     ...problems...
 *     List<SnapResult> failures = snapshot.getFailures();
 *     ... process failures ...
 *   }
 *
 * The SnapResult object contains the ResourceId of the failed node along with
 * any exception or additional information that may be useful.  It is also
 * possible to get a list of successes.
 *
 * The name passed into createSnapshot() will be appended to the current date
 * and time to make the actual snapshot name used, which is the one returned
 * by listSnapshots() and used for removeSnapshot().
 *
 */

public class Snapshot {

    private final Topology topo;
    private final CommandServiceAPI cs;
    private final boolean opsOnServer;
    private final SnapshotUtils utils;
    private SnapResultSummary snapResultSummary;
    private SnapshotOperation op;

    public enum SnapshotOperation {
        CREATE, REMOVE, REMOVEALL
    }

    public Snapshot(CommandServiceAPI cs,
                    boolean verboseOutput,
                    PrintStream output)
        throws RemoteException {
        this.cs = cs;
        topo = cs.getTopology();
        utils = new SnapshotUtils(verboseOutput, output);
        opsOnServer =
            (cs.getSerialVersion() >= SerialVersion.SNAPSHOT_ON_SERVER_VERSION);
    }

    /**
     * Results handling.
     */

    /**
     * Indicates whether the operation succeeded on all nodes in the Topology.
     */
    public boolean succeeded() {
        return snapResultSummary.getSucceeded();
    }

    /**
     * Return true if at least one node in each replication group (including
     * the admin) returned success.  If there was a failure the caller must
     * use other methods to report the problems.
     */
    public boolean getQuorumSucceeded() {
        return snapResultSummary.getQuorumSucceeded();
    }

    /**
     * Return the list of SnapResult objects for which the operation succeeded
     */
    public List getSuccesses() {
        return snapResultSummary.getSuccesses();
    }

    /**
     * Return the list of SnapResult objects for which the operation failed
     */
    public List getFailures() {
        return snapResultSummary.getFailures();
    }

    /**
     * Return the list of SnapResult of snapshot configuration operations
     * which are succeeded.
     */
    public List getConfigSuccesses() {
        return snapResultSummary.getConfigSuccesses();
    }

    /**
     * Return the list of SnapResult of snapshot configuration operations
     * which are failed.
     */
    public List getConfigFailures() {
        return snapResultSummary.getConfigFailures();
    }

    public SnapshotOperation getOperation() {
        return op;
    }

    /**
     * Create a snapshot on all Storage Nodes in the topology.  The actual
     * snapshot name is the current time/date concatenated with the name
     * parameter.
     *
     * @param name the suffix to use for the snapshot name
     *
     * @return the generated snapshot name
     */
    public String createSnapshot(String name)
        throws Exception {
        return createSnapshot(name, null);
    }

    /**
     * Create a snapshot on all Storage Nodes in the specified zone.
     * The actual snapshot name is the current time/date concatenated with the
     * name parameter.
     * Operation applies to all zones if dcId is null.
     *
     * @param name the suffix to use for the snapshot name
     * @param dcId the datacenterId
     *
     * @return the generated snapshot name
     */
    public String createSnapshot(String name, DatacenterId dcId)
        throws Exception {

        String snapName = makeSnapshotName(name);
        utils.verbose("Start create snapshot " + snapName);
        resetOperation(SnapshotOperation.CREATE);
        snapResultSummary = opsOnServer ?
                            cs.executeSnapshotOperation(op, snapName, dcId) :
                            executeSnapshotTasks(snapName, dcId);
        utils.verbose("Complete create snapshot " + snapName);
        return snapName;
    }

    /**
     * Remove a snapshot from all Storage Nodes.  Each Storage Node will remove
     * it from any managed services.
     *
     * @param name the full name of the snapshot, including date time that was
     * generated by createSnapshot
     *
     */
    public void removeSnapshot(String name)
        throws Exception {
        removeSnapshot(name, null);
    }

    /**
     * Remove a snapshot from all Storage Nodes belonging to the given zone.
     * Each Storage Node will remove it from any managed services.
     * Operation applies to all zones if dcId is null.
     *
     * @param name the full name of the snapshot, including date time that was
     * generated by createSnapshot
     * @param dcId the datacenterId
     */
    public void removeSnapshot(String name, DatacenterId dcId)
        throws Exception {

        utils.verbose("Start remove snapshot " + name);
        resetOperation(SnapshotOperation.REMOVE);
        snapResultSummary = opsOnServer ?
                            cs.executeSnapshotOperation(op, name, dcId) :
                            executeSnapshotTasks(name, dcId);
        utils.verbose("Complete remove snapshot " + name);
    }

    /**
     * Remove all known snapshots unconditionally.
     */
    public void removeAllSnapshots()
        throws Exception {

        removeAllSnapshots(null);
    }

    /**
     * Remove all known snapshots in a given zone. If null is supplied for dcId
     * all the known spanshots are removed unconditionally.
     */
    public void removeAllSnapshots(DatacenterId dcId)
        throws Exception {

        utils.verbose("Start remove all snapshots");
        resetOperation(SnapshotOperation.REMOVEALL);
        snapResultSummary = opsOnServer ?
                            cs.executeSnapshotOperation(op, null, dcId) :
                            executeSnapshotTasks(null, dcId);
        utils.verbose("Complete remove all snapshots");
    }

    /**
     * Return an array of names of snapshots.  For now this will choose an
     * arbitrary Storage Node and ask it for its list under the assumption
     * that each SN should have the same snapshots.  Try all storage nodes
     * if any are not available.
     */
    public String[] listSnapshots()
        throws RemoteException {

        return cs.listSnapshots(null /* StorageNodeId */);
    }

    /**
     * A variant of listSnapshots that takes a specific StorageNodeId as the
     * target node.
     */
    public String[] listSnapshots(StorageNodeId snid)
        throws RemoteException {

        StorageNode sn = topo.get(snid);
        if (sn == null) {
            throw new IllegalArgumentException
                ("No Storage Node found with id " + snid);
        }
        utils.verbose("Listing snapshots from Storage Node: " + snid);
        return cs.listSnapshots(snid);
    }

    private void resetOperation(SnapshotOperation newOp) {
        op = newOp;
        utils.resetOperation(newOp);
        snapResultSummary = null;
    }

    /**
     * Common code to perform the same task on all services and wait for
     * results.  Because storage nodes can be down put the possibility of
     * connection-related exceptions inside the task execution vs creation.
     */
    private SnapResultSummary executeSnapshotTasks(String name,
                                                   DatacenterId dcId)
        throws Exception {

        List repNodes = topo.getSortedRepNodes();
        ExecutorService threadPool = null;
        try {
            threadPool = utils.createThreadPool();
            List> taskResults = utils.createFutureList();

            /**
             * Do admins first
             */
            Parameters p = cs.getParameters();
            for (AdminId id : p.getAdminIds()) {
                AdminParams ap = p.get(id);
                StorageNode sn = topo.get(ap.getStorageNodeId());
                if (dcId != null && !sn.getDatacenterId().equals(dcId)) {
                    continue;
                }
                utils.verbose("Creating task (" + op + ") for " + id);
                taskResults.add
                    (threadPool.submit
                        (new SnapshotTask(name, sn.getResourceId(), id)));
            }

            for (RepNode rn : repNodes) {
                StorageNode sn = topo.get(rn.getStorageNodeId());
                if (dcId != null && !sn.getDatacenterId().equals(dcId)) {
                    continue;
                }
                utils.verbose("Creating task (" + op + ") for " + rn);
                taskResults.add
                    (threadPool.submit
                        (new SnapshotTask
                            (name, sn.getResourceId(), rn.getResourceId())));
            }
            return utils.waitForResults(topo, taskResults);
        } finally {
            if (threadPool != null) {
                threadPool.shutdown();
            }
        }
    }

    private String makeSnapshotName(String name) {
        Date now = new Date();
        SimpleDateFormat format = new SimpleDateFormat("yyMMdd-HHmmss");
        return format.format(now) + "-" + name;
    }

    /**
     * The SnapResult object contains the ResourceId of the failed node along
     * with any exception or additional information that may be useful.
     */
    public static class SnapResult implements Serializable {
        private static final long serialVersionUID = 1L;

        private final boolean result;
        private final ResourceId service;
        private final Exception ex;
        private final String message;
        private final SnapshotOperation op;

        protected SnapResult(final SnapshotOperation op,
                             final boolean result,
                             final ResourceId service,
                             final Exception ex,
                             final String message) {
            this.op = op;
            this.result = result;
            this.service = service;
            this.ex = ex;

            /**
             * TODO: should message include stack trace in the exception case?
             */
            if (ex != null) {
                this.message = message + ", Exception: " + ex;
            } else {
                this.message = message;
            }
        }

        public boolean getSucceeded() {
            return result;
        }

        public ResourceId getService() {
            return service;
        }

        public String getMessage() {
            return message;
        }

        public Exception getException() {
            return ex;
        }

        public String getExceptionStackTrace() {
            if (ex != null) {
                return LoggerUtils.getStackTrace(ex);
            }
            return "";
        }

        @Override
        public String toString() {
            return "Operation " + op + " on " + service +
                (result ? " succeeded" : " failed") + ": " + message;
        }
    }

    /**
     * Encapsulate the results of a distributed snapshot operation.
     */
    public static class SnapResultSummary implements Serializable {
        private static final long serialVersionUID = 1L;

        /*
         * Indicate success of snapshot data files.
         */
        private final List success;

        /*
         * Indicate failure of snapshot data files.
         */
        private final List failure;

        private final boolean allSucceeded;
        private final boolean quorumSucceeded;
        private final List configSuccess;
        private final List configFailure;

        public SnapResultSummary(final List success,
                                 final List failure,
                                 final boolean allSucceeded,
                                 final boolean quorumSucceeded,
                                 final List configSuccess,
                                 final List configFailure) {
            this.success = success;
            this.failure = failure;
            this.allSucceeded = allSucceeded;
            this.quorumSucceeded = quorumSucceeded;
            this.configSuccess = configSuccess;
            this.configFailure = configFailure;
        }

        public List getSuccesses() {
            return success;
        }

        public List getFailures() {
            return failure;
        }

        public boolean getSucceeded() {
            return allSucceeded;
        }

        public boolean getQuorumSucceeded() {
            return quorumSucceeded;
        }

        public List getConfigSuccesses() {
            if (configSuccess == null) {
                return Collections.emptyList();
            }
            return configSuccess;
        }

        public List getConfigFailures() {
            if (configFailure == null) {
                return Collections.emptyList();
            }
            return configFailure;
        }

    }

    /**
     * Encapsulate a task to snapshot a single managed service, either RepNode
     * or Admin.
     */
    private class SnapshotTask implements Callable {
        String name;
        StorageNodeId snid;
        ResourceId rid;

        public SnapshotTask(String name,
                            StorageNodeId snid,
                            ResourceId rid) {
            this.name = name;
            this.snid = snid;
            this.rid = rid;
        }

        @Override
        public SnapResult call() {
            try {
                return cs.executeSnapshotOperation(op, snid, rid, name);
            } catch (Exception e) {
                return new SnapResult(op, false, rid, e, "Failed");
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy