All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.zeebe.broker.clustering.management.ClusterManager Maven / Gradle / Ivy

There is a newer version: 0.21.0-alpha1
Show newest version
/*
 * Zeebe Broker Core
 * Copyright © 2017 camunda services GmbH ([email protected])
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package io.zeebe.broker.clustering.management;

import static io.zeebe.broker.clustering.ClusterServiceNames.RAFT_SERVICE_GROUP;
import static io.zeebe.broker.clustering.ClusterServiceNames.raftServiceName;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.*;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Collectors;

import io.zeebe.broker.Loggers;
import io.zeebe.broker.clustering.handler.Topology;
import io.zeebe.broker.clustering.management.handler.ClusterManagerFragmentHandler;
import io.zeebe.broker.clustering.management.memberList.ClusterMemberListManager;
import io.zeebe.broker.clustering.management.memberList.MemberRaftComposite;
import io.zeebe.broker.clustering.management.message.*;
import io.zeebe.broker.clustering.raft.RaftPersistentFileStorage;
import io.zeebe.broker.clustering.raft.RaftService;
import io.zeebe.broker.logstreams.LogStreamsManager;
import io.zeebe.broker.transport.TransportServiceNames;
import io.zeebe.broker.transport.cfg.SocketBindingCfg;
import io.zeebe.broker.transport.cfg.TransportComponentCfg;
import io.zeebe.logstreams.impl.log.fs.FsLogStorage;
import io.zeebe.logstreams.log.LogStream;
import io.zeebe.protocol.Protocol;
import io.zeebe.raft.Raft;
import io.zeebe.raft.RaftPersistentStorage;
import io.zeebe.raft.state.RaftState;
import io.zeebe.servicecontainer.ServiceContainer;
import io.zeebe.servicecontainer.ServiceName;
import io.zeebe.transport.*;
import io.zeebe.util.sched.Actor;
import io.zeebe.util.sched.future.ActorFuture;
import org.agrona.DirectBuffer;
import org.agrona.concurrent.UnsafeBuffer;
import org.slf4j.Logger;

public class ClusterManager extends Actor
{
    private static final Logger LOG = Loggers.CLUSTERING_LOGGER;
    private static final DirectBuffer EMPTY_BUF = new UnsafeBuffer(0, 0);

    private final ClusterManagerContext context;
    private final ServiceContainer serviceContainer;

    private final List rafts;

    private final InvitationRequest invitationRequest;
    private final InvitationResponse invitationResponse;
    private final CreatePartitionRequest createPartitionRequest = new CreatePartitionRequest();

    private TransportComponentCfg transportComponentCfg;

    private final ServerResponse response = new ServerResponse();

    private final LogStreamsManager logStreamsManager;
    private final ClusterMemberListManager clusterMemberListManager;

    public ClusterManager(final ClusterManagerContext context,
                          final ServiceContainer serviceContainer,
                          final TransportComponentCfg transportComponentCfg)
    {
        this.context = context;
        this.serviceContainer = serviceContainer;
        this.transportComponentCfg = transportComponentCfg;
        this.rafts = new CopyOnWriteArrayList<>();
        this.invitationRequest = new InvitationRequest();
        this.logStreamsManager = context.getLogStreamsManager();

        this.invitationResponse = new InvitationResponse();
        this.clusterMemberListManager = new ClusterMemberListManager(context, actor, transportComponentCfg, this::inviteUpdatedMember);
    }

    public void close()
    {
        actor.close();
    }

    private void open()
    {
        final List collect = Arrays.stream(transportComponentCfg.gossip.initialContactPoints)
            .map(SocketAddress::from)
            .collect(Collectors.toList());
        if (!collect.isEmpty())
        {
            context.getGossip().join(collect);
        }

        clusterMemberListManager.publishNodeAPIAddresses();

        final LogStreamsManager logStreamManager = context.getLogStreamsManager();

        final File storageDirectory = new File(transportComponentCfg.management.directory);

        if (!storageDirectory.exists())
        {
            try
            {
                storageDirectory.getParentFile()
                                .mkdirs();
                Files.createDirectory(storageDirectory.toPath());
            }
            catch (final IOException e)
            {
                LOG.error("Unable to create directory {}", storageDirectory, e);
            }
        }

        final SocketBindingCfg replicationApi = transportComponentCfg.replicationApi;
        final SocketAddress socketAddress = new SocketAddress(replicationApi.getHost(transportComponentCfg.host), replicationApi.port);
        final File[] storageFiles = storageDirectory.listFiles();

        if (storageFiles != null && storageFiles.length > 0)
        {
            for (int i = 0; i < storageFiles.length; i++)
            {
                final File storageFile = storageFiles[i];
                final RaftPersistentFileStorage storage = new RaftPersistentFileStorage(storageFile.getAbsolutePath());

                final DirectBuffer topicName = storage.getTopicName();
                final int partitionId = storage.getPartitionId();

                LogStream logStream = logStreamManager.getLogStream(partitionId);

                if (logStream == null)
                {
                    final String directory = storage.getLogDirectory();
                    logStream = logStreamManager.createLogStream(topicName, partitionId, directory);
                }

                storage.setLogStream(logStream);

                createRaft(socketAddress, logStream, storage.getMembers(), storage);
            }
        }
        else
        {
            if (transportComponentCfg.gossip.initialContactPoints.length == 0)
            {
                LOG.debug("Broker bootstraps the system topic");
                createPartition(Protocol.SYSTEM_TOPIC_BUF, Protocol.SYSTEM_PARTITION);
            }
        }
    }

    @Override
    public String getName()
    {
        return "cluster-manager";
    }

    @Override
    protected void onActorStarted()
    {
        final ClusterManagerFragmentHandler fragmentHandler = new ClusterManagerFragmentHandler(this, context.getWorkflowRequestMessageHandler());

        final ActorFuture serverInputSubscriptionActorFuture = context.getServerTransport()
            .openSubscription("cluster-management", fragmentHandler, fragmentHandler);

        actor.runOnCompletion(serverInputSubscriptionActorFuture, (subscription, throwable) ->
        {
            if (throwable == null)
            {
                actor.consume(subscription, () ->
                {
                    if (subscription.poll() == 0)
                    {
                        actor.yield();
                    }
                });

                open();
            }
            else
            {
                Loggers.CLUSTERING_LOGGER.error("Failed to open a subscription.");
            }
        });
    }

    private void inviteUpdatedMember(SocketAddress updatedMember)
    {
        LOG.debug("Send raft invitations to member {}.", updatedMember);
        for (Raft raft : rafts)
        {
            if (raft.getState() == RaftState.LEADER)
            {
                // TODO don't invite all members
                inviteMemberToRaft(updatedMember, raft);
            }
        }
    }

    /**
     * Invites the member to the RAFT group.
     */
    protected void inviteMemberToRaft(SocketAddress member, Raft raft)
    {
        // TODO(menski): implement replication factor
        // TODO: if this should be garbage free, we have to limit
        // the number of concurrent invitations.
        final List members = new ArrayList<>();
        members.add(raft.getSocketAddress());
        raft.getMembers()
            .forEach(raftMember -> members.add(raftMember.getRemoteAddress()
                                                         .getAddress()));

        final LogStream logStream = raft.getLogStream();
        final InvitationRequest invitationRequest = new InvitationRequest().topicName(logStream.getTopicName())
                                                                           .partitionId(logStream.getPartitionId())
                                                                           .term(raft.getTerm())
                                                                           .members(members);

        LOG.debug("Send invitation request to {} for partition {} in term {}", member, logStream.getPartitionId(), raft.getTerm());

        final RemoteAddress remoteAddress = context.getManagementClient().registerRemoteAddress(member);
        final ActorFuture clientResponse = context.getManagementClient().getOutput().sendRequest(remoteAddress, invitationRequest);

        actor.runOnCompletion(clientResponse, (request, throwable) ->
        {
            if (throwable == null)
            {
                request.close();
                LOG.debug("Got invitation response from {} for partition id {}.", member, logStream.getPartitionId());
            }
            else
            {
                LOG.debug("Invitation request failed");
            }
        });
    }

    public void createRaft(final SocketAddress socketAddress, final LogStream logStream, final List members)
    {
        final FsLogStorage logStorage = (FsLogStorage) logStream.getLogStorage();
        final String path = logStorage.getConfig()
                                      .getPath();

        final String directory = transportComponentCfg.management.directory;
        final RaftPersistentFileStorage storage = new RaftPersistentFileStorage(String.format("%s%s.meta", directory, logStream.getLogName()));
        storage.setLogStream(logStream)
               .setLogDirectory(path)
               .save();

        createRaft(socketAddress, logStream, members, storage);
    }

    public void createRaft(final SocketAddress socketAddress, final LogStream logStream, final List members,
                           final RaftPersistentStorage persistentStorage)
    {
        final ServiceName raftServiceName = raftServiceName(logStream.getLogName());
        final RaftService raftService = new RaftService(transportComponentCfg.raft, socketAddress, logStream, members, persistentStorage, clusterMemberListManager, clusterMemberListManager, raftServiceName);

        serviceContainer.createService(raftServiceName, raftService)
                        .group(RAFT_SERVICE_GROUP)
                        .dependency(TransportServiceNames.clientTransport(TransportServiceNames.REPLICATION_API_CLIENT_NAME),
                                    raftService.getClientTransportInjector())
                        .install();
    }

    protected boolean partitionExists(int partitionId)
    {
        return logStreamsManager.hasLogStream(partitionId);
    }

    /**
     * Creates log stream and sets up raft service to participate in raft group
     */
    protected void createPartition(DirectBuffer topicName, int partitionId)
    {
        createPartition(topicName, partitionId, Collections.emptyList());
    }

    /**
     * Creates log stream and sets up raft service to participate in raft group
     */
    protected void createPartition(DirectBuffer topicName, int partitionId, List members)
    {
        final LogStream logStream = logStreamsManager.createLogStream(topicName, partitionId);

        final SocketBindingCfg replicationApi = transportComponentCfg.replicationApi;
        final SocketAddress socketAddress = new SocketAddress(replicationApi.getHost(transportComponentCfg.host), replicationApi.port);
        createRaft(socketAddress, logStream, members);
    }

    public boolean onInvitationRequest(final DirectBuffer buffer, final int offset, final int length, final ServerOutput output,
                                       final RemoteAddress requestAddress, final long requestId)
    {
        invitationRequest.reset();
        invitationRequest.wrap(buffer, offset, length);

        LOG.debug("Received invitation request from {} for partition {}", requestAddress.getAddress(), invitationRequest.partitionId());

        final DirectBuffer topicName = invitationRequest.topicName();
        final int partitionId = invitationRequest.partitionId();

        createPartition(topicName, partitionId, new ArrayList<>(invitationRequest.members()));

        invitationResponse.reset();
        response.reset()
                .remoteAddress(requestAddress)
                .requestId(requestId)
                .writer(invitationResponse);

        return output.sendResponse(response);
    }

    public boolean onCreatePartitionRequest(
            final DirectBuffer buffer, final int offset, final int length,
            final ServerOutput output, final RemoteAddress requestAddress, final long requestId)
    {
        createPartitionRequest.wrap(buffer, offset, length);

        LOG.debug("Received create partition request for partition {}", createPartitionRequest.getPartitionId());

        final int partitionId = createPartitionRequest.getPartitionId();

        if (!partitionExists(partitionId))
        {
            LOG.debug("Creating partition {}", createPartitionRequest.getPartitionId());
            createPartition(createPartitionRequest.getTopicName(), partitionId);
        }
        else
        {
            LOG.debug("Partition {} exists already. Ignoring creation request.", createPartitionRequest.getPartitionId());
        }

        response.reset()
            .remoteAddress(requestAddress)
            .requestId(requestId)
            .buffer(EMPTY_BUF);

        return output.sendResponse(response);
    }

    public ActorFuture requestTopology()
    {
        return clusterMemberListManager.createTopology();
    }

    /**
     * This method is called, if a new RAFT is added to the service group.
     */
    public void addRaftCallback(final ServiceName raftServiceName, final Raft raft)
    {
        // this must be determined before we cross the async boundary to avoid race conditions
        final boolean isRaftCreator = raft.getMemberSize() == 0;
        actor.call(() ->
        {
            LOG.trace("ADD raft {} for partition {} state {}.", raft.getSocketAddress(), raft.getLogStream()
                                                                                             .getPartitionId(), raft.getState());
            rafts.add(raft);

            if (isRaftCreator)
            {
                final Iterator iterator = context.getMemberListService()
                                                                      .iterator();
                while (iterator.hasNext())
                {
                    final MemberRaftComposite next = iterator.next();
                    if (!next.getMember()
                             .equals(transportComponentCfg.managementApi.toSocketAddress(transportComponentCfg.host)))
                    {
                        // TODO don't invite all members to raft
                        inviteMemberToRaft(next.getMember(), raft);
                    }
                }
            }
        });
    }

    /**
     * This method is called, if a RAFT is removed from the service group.
     */
    public void removeRaftCallback(final Raft raft)
    {
        final LogStream logStream = raft.getLogStream();
        final int partitionId = logStream.getPartitionId();

        actor.call(() ->
        {
            for (int i = 0; i < rafts.size(); i++)
            {
                final Raft r = rafts.get(i);
                final LogStream stream = r.getLogStream();
                if (partitionId == stream.getPartitionId())
                {
                    rafts.remove(i);
                    break;
                }
            }
        });
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy