All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.instaclustr.cassandra.sidecar.coordination.SidecarBackupOperationCoordinator Maven / Gradle / Ivy

There is a newer version: 2.0.0-alpha8
Show newest version
package com.instaclustr.cassandra.sidecar.coordination;

import static com.instaclustr.cassandra.sidecar.coordination.CoordinationUtils.constructSidecars;
import static java.lang.String.format;
import static java.util.concurrent.CompletableFuture.allOf;
import static java.util.concurrent.CompletableFuture.supplyAsync;

import java.net.InetAddress;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.inject.Inject;
import com.instaclustr.cassandra.backup.guice.BackuperFactory;
import com.instaclustr.cassandra.backup.guice.BucketServiceFactory;
import com.instaclustr.cassandra.backup.impl.StorageLocation;
import com.instaclustr.cassandra.backup.impl.backup.BackupOperation;
import com.instaclustr.cassandra.backup.impl.backup.BackupOperationRequest;
import com.instaclustr.cassandra.backup.impl.backup.BackupPhaseResultGatherer;
import com.instaclustr.cassandra.backup.impl.backup.Backuper;
import com.instaclustr.cassandra.backup.impl.backup.coordination.BaseBackupOperationCoordinator;
import com.instaclustr.cassandra.sidecar.rest.SidecarClient;
import com.instaclustr.cassandra.sidecar.rest.SidecarClient.OperationResult;
import com.instaclustr.cassandra.topology.CassandraClusterTopology;
import com.instaclustr.cassandra.topology.CassandraClusterTopology.ClusterTopology;
import com.instaclustr.operations.GlobalOperationProgressTracker;
import com.instaclustr.operations.Operation;
import com.instaclustr.operations.OperationsService;
import com.instaclustr.operations.ResultGatherer;
import com.instaclustr.sidecar.picocli.SidecarSpec;
import com.instaclustr.threading.Executors.ExecutorServiceSupplier;
import jmx.org.apache.cassandra.service.CassandraJMXService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SidecarBackupOperationCoordinator extends BaseBackupOperationCoordinator {

    private static final int MAX_NUMBER_OF_CONCURRENT_OPERATIONS = Integer.parseInt(System.getProperty("instaclustr.sidecar.operations.executor.size", "100"));

    private static final Logger logger = LoggerFactory.getLogger(SidecarBackupOperationCoordinator.class);

    private final OperationsService operationsService;
    private final SidecarSpec sidecarSpec;
    private final ExecutorServiceSupplier executorServiceSupplier;
    private final ObjectMapper objectMapper;

    @Inject
    public SidecarBackupOperationCoordinator(final CassandraJMXService cassandraJMXService,
                                             final Map backuperFactoryMap,
                                             final Map bucketServiceFactoryMap,
                                             final OperationsService operationsService,
                                             final SidecarSpec sidecarSpec,
                                             final ExecutorServiceSupplier executorServiceSupplier,
                                             final ObjectMapper objectMapper) {
        super(cassandraJMXService, backuperFactoryMap, bucketServiceFactoryMap);
        this.operationsService = operationsService;
        this.sidecarSpec = sidecarSpec;
        this.executorServiceSupplier = executorServiceSupplier;
        this.objectMapper = objectMapper;
    }

    @Override
    public ResultGatherer coordinate(final Operation operation) throws OperationCoordinatorException {

        /*
         * I receive a request
         *  If it is a global request, I will be coordinator
         *  otherwise just execute that request
         */

        // if it is not global request, there might be at most one global request running
        // and no other restore operations can run, so this means there might be at most one
        // global request running at this node, together with this "normal" restore operation - hence two.
        //
        // this node can be a coordinator of a global request and it can as well receive "normal" restoration request phase
        // so there is a valid case that this node will be running a global request and restoration phase simultaneously
        // hence there will be up to two operations of "restore" type and at most one of them is global

        if (!operation.request.globalRequest) {

            final List restoreUUIDs = operationsService.allRunningOfType("backup");

            if (restoreUUIDs.size() > 2) {
                throw new IllegalStateException("There are more than two concurrent backup operations running!");
            }

            int normalRequests = 0;

            for (final UUID uuid : restoreUUIDs) {
                final Optional operationOptional = operationsService.operation(uuid);

                if (!operationOptional.isPresent()) {
                    throw new IllegalStateException(format("received empty optional for uuid %s", uuid.toString()));
                }

                final Operation op = operationOptional.get();

                if (!(op.request instanceof BackupOperationRequest)) {
                    throw new IllegalStateException(format("Received request is not of type %s", BackupOperationRequest.class));
                }

                BackupOperationRequest request = (BackupOperationRequest) op.request;

                if (!request.globalRequest) {
                    normalRequests += 1;
                }
            }

            if (normalRequests == 2) {
                throw new IllegalStateException("We can not run two normal backup requests simultaneously.");
            }

            return super.coordinate(operation);
        }

        // if it is a global request, we will coordinate whole backup across a cluster in this operation
        // when this operation finishes, whole cluster will be restored.

        // first we have to make some basic checks, e.g. we can be the only global backup operation on this node
        // and no other restore operations (even partial) can run simultaneously

        final List restoreUUIDs = operationsService.allRunningOfType("backup");

        if (restoreUUIDs.size() != 1) {
            throw new IllegalStateException("There is more than one running backup operation.");
        }

        if (!restoreUUIDs.get(0).equals(operation.id)) {
            throw new IllegalStateException("ID of a running operation does not equal to ID of this backup operation!");
        }

        ClusterTopology topology;

        try {
            topology = new CassandraClusterTopology(cassandraJMXService, operation.request.dc).act();
        } catch (final Exception ex) {
            throw new OperationCoordinatorException("Unable to get ClusterTopoogy!", ex);
        }

        final Map endpoints = topology.endpoints;

        logger.info("Datacenter to be backed up: {}", operation.request.dc == null ? "all of them" : operation.request.dc);

        logger.info("Resolved endpoints: {}", endpoints.toString());

        final Map endpointDCs = topology.endpointDcs;

        logger.info("Resolved endpoints and their dc: {}", endpointDCs.toString());

        final String clusterName = topology.clusterName;

        logger.info("Resolved cluster name: {}", clusterName);

        final Map sidecarClientMap = constructSidecars(clusterName, endpoints, endpointDCs, sidecarSpec, objectMapper);

        logger.info("Executing backup requests against " + sidecarClientMap.toString());

        final BackupRequestPreparation backupRequestPreparation = (client, globalRequest) -> {

            try {
                if (!client.getHostId().isPresent()) {
                    throw new OperationCoordinatorException(format("There is not any hostId for client %s", client.getHost()));
                }

                final BackupOperationRequest clonedRequest = (BackupOperationRequest) globalRequest.clone();
                final BackupOperation backupOperation = new BackupOperation(clonedRequest);
                backupOperation.request.globalRequest = false;

                backupOperation.request.storageLocation = StorageLocation.update(backupOperation.request.storageLocation,
                                                                                 client.getClusterName(),
                                                                                 client.getDc(),
                                                                                 client.getHostId().get().toString());

                backupOperation.request.storageLocation.globalRequest = false;

                return backupOperation;
            } catch (final Exception ex) {
                throw new OperationCoordinatorException(format("Unable to prepare backup operation for client %s.", client.getHost()), ex);
            }
        };


        BackupPhaseResultGatherer backupPhaseResultGatherer = executeDistributedBackup(operation, sidecarClientMap, backupRequestPreparation);

        try {
            final String clusterTopologyString = ClusterTopology.writeToString(objectMapper, topology);

            final String clusterId = sidecarClientMap.entrySet().iterator().next().getValue().getClusterName();

            final Path topologyPath = Paths.get(format("topology/%s-%s-%s-topology.json",
                                                       clusterId,
                                                       operation.request.snapshotTag,
                                                       topology.schemaVersion));

            logger.info("Uploading cluster topology under {}", topologyPath);
            logger.info("\n" + clusterTopologyString);

            try (Backuper backuper = backuperFactoryMap.get(operation.request.storageLocation.storageProvider).createBackuper(operation.request)) {
                backuper.uploadText(clusterTopologyString, backuper.objectKeyToRemoteReference(topologyPath));
            }
        } catch (final Exception ex) {
            throw new OperationCoordinatorException("Unable to upload topology file", ex);
        }

        return backupPhaseResultGatherer;
    }

    private interface BackupRequestPreparation {

        Operation prepare(final SidecarClient client, final BackupOperationRequest globalRequest) throws OperationCoordinatorException;
    }

    private BackupPhaseResultGatherer executeDistributedBackup(final Operation globalOperation,
                                                               final Map sidecarClientMap,
                                                               final BackupRequestPreparation requestPreparation) throws OperationCoordinatorException {
        final ExecutorService executorService = executorServiceSupplier.get(MAX_NUMBER_OF_CONCURRENT_OPERATIONS);

        final BackupPhaseResultGatherer resultGatherer = new BackupPhaseResultGatherer();

        try {
            final List callables = new ArrayList<>();
            final GlobalOperationProgressTracker progressTracker = new GlobalOperationProgressTracker(globalOperation, sidecarClientMap.entrySet().size());

            // create

            for (final Map.Entry entry : sidecarClientMap.entrySet()) {
                callables.add(new BackupOperationCallable(requestPreparation.prepare(entry.getValue(), globalOperation.request),
                                                          entry.getValue(),
                                                          progressTracker));
            }

            // submit & gather results

            allOf(callables.stream().map(c -> supplyAsync(c, executorService).whenComplete((result, throwable) -> {
                if (throwable != null) {
                    logger.warn(format("Backup against %s has failed.", result.request.storageLocation));
                    resultGatherer.gather(result, throwable);
                }
            })).toArray(CompletableFuture[]::new)).get();
        } catch (ExecutionException | InterruptedException ex) {
            ex.printStackTrace();
            resultGatherer.gather(globalOperation, new OperationCoordinatorException("Unable to coordinate backup! " + ex.getMessage(), ex));
        } finally {
            executorService.shutdownNow();
        }

        return resultGatherer;
    }

    private static class BackupOperationCallable extends OperationCallable {

        public BackupOperationCallable(final Operation operation,
                                       final SidecarClient sidecarClient,
                                       final GlobalOperationProgressTracker progressTracker) {
            super(operation, operation.request.timeout, sidecarClient, progressTracker, "backup");
        }

        public OperationResult sendOperation() {
            return super.sidecarClient.backup(super.operation.request);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy