Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.arcadedb.server.ha.HAServer Maven / Gradle / Ivy
/*
* Copyright © 2021-present Arcade Data Ltd ([email protected] )
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: 2021-present Arcade Data Ltd ([email protected] )
* SPDX-License-Identifier: Apache-2.0
*/
package com.arcadedb.server.ha;
import com.arcadedb.ContextConfiguration;
import com.arcadedb.GlobalConfiguration;
import com.arcadedb.database.Binary;
import com.arcadedb.exception.ConcurrentModificationException;
import com.arcadedb.exception.ConfigurationException;
import com.arcadedb.exception.TimeoutException;
import com.arcadedb.exception.TransactionException;
import com.arcadedb.log.LogManager;
import com.arcadedb.network.binary.ChannelBinaryClient;
import com.arcadedb.network.binary.ConnectionException;
import com.arcadedb.network.HostUtil;
import com.arcadedb.network.binary.QuorumNotReachedException;
import com.arcadedb.network.binary.ServerIsNotTheLeaderException;
import com.arcadedb.query.sql.executor.InternalResultSet;
import com.arcadedb.query.sql.executor.ResultInternal;
import com.arcadedb.serializer.json.JSONArray;
import com.arcadedb.serializer.json.JSONObject;
import com.arcadedb.server.ArcadeDBServer;
import com.arcadedb.server.ReplicationCallback;
import com.arcadedb.server.ServerException;
import com.arcadedb.server.ServerPlugin;
import com.arcadedb.server.ha.message.ErrorResponse;
import com.arcadedb.server.ha.message.HACommand;
import com.arcadedb.server.ha.message.HAMessageFactory;
import com.arcadedb.server.ha.message.UpdateClusterConfiguration;
import com.arcadedb.server.ha.network.DefaultServerSocketFactory;
import com.arcadedb.utility.Callable;
import com.arcadedb.utility.CodeUtils;
import com.arcadedb.utility.DateUtils;
import com.arcadedb.utility.Pair;
import com.arcadedb.utility.RecordTableFormatter;
import com.arcadedb.utility.TableFormatter;
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.*;
import java.util.logging.*;
public class HAServer implements ServerPlugin {
public static final String DEFAULT_PORT = HostUtil.HA_DEFAULT_PORT;
private final HAMessageFactory messageFactory;
private final ArcadeDBServer server;
private final ContextConfiguration configuration;
private final String bucketName;
private final long startedOn;
private volatile int configuredServers = 1;
private final Map replicaConnections = new ConcurrentHashMap<>();
private final AtomicLong lastDistributedOperationNumber = new AtomicLong(-1);
private final AtomicLong lastForwardOperationNumber = new AtomicLong(0);
protected final String replicationPath;
protected ReplicationLogFile replicationLogFile;
private final AtomicReference leaderConnection = new AtomicReference<>();
private LeaderNetworkListener listener;
private final Map messagesWaitingForQuorum = new ConcurrentHashMap<>(
1024);
private final Map forwardMessagesWaitingForResponse = new ConcurrentHashMap<>(
1024);
private long lastConfigurationOutputHash = 0;
private final Object sendingLock = new Object();
private String serverAddress;
private final Set serverAddressList = new HashSet<>();
private String replicasHTTPAddresses;
protected Pair lastElectionVote;
private volatile ELECTION_STATUS electionStatus = ELECTION_STATUS.DONE;
private boolean started;
private final SERVER_ROLE serverRole;
private Thread electionThread;
public enum QUORUM {
NONE, ONE, TWO, THREE, MAJORITY, ALL;
public int quorum(int numberOfServers) {
return switch (this) {
case NONE -> 0;
case ONE -> 1;
case TWO -> 2;
case THREE -> 3;
case MAJORITY -> numberOfServers / 2 + 1;
case ALL -> numberOfServers;
};
}
}
public enum ELECTION_STATUS {
DONE, VOTING_FOR_ME, VOTING_FOR_OTHERS, LEADER_WAITING_FOR_QUORUM
}
public enum SERVER_ROLE {
ANY, REPLICA
}
private static class QuorumMessage {
public final long sentOn = System.currentTimeMillis();
public final CountDownLatch semaphore;
public List payloads;
public QuorumMessage(final CountDownLatch quorumSemaphore) {
this.semaphore = quorumSemaphore;
}
}
private static class ForwardedMessage {
public final CountDownLatch semaphore;
public ErrorResponse error;
public Object result;
public ForwardedMessage() {
this.semaphore = new CountDownLatch(1);
}
}
// private static class RemovedServerInfo {
// String serverName;
// long joinedOn;
// long leftOn;
//
// public RemovedServerInfo(final String remoteServerName, final long joinedOn) {
// this.serverName = remoteServerName;
// this.joinedOn = joinedOn;
// this.leftOn = System.currentTimeMillis();
// }
// }
public HAServer(final ArcadeDBServer server, final ContextConfiguration configuration) {
if (!configuration.getValueAsBoolean(GlobalConfiguration.TX_WAL))
throw new ConfigurationException("Cannot start HA service without using WAL. Please enable the TX_WAL setting.");
this.server = server;
this.messageFactory = new HAMessageFactory(server);
this.configuration = configuration;
this.bucketName = configuration.getValueAsString(GlobalConfiguration.HA_CLUSTER_NAME);
this.startedOn = System.currentTimeMillis();
this.replicationPath = server.getRootPath() + "/replication";
this.serverRole = SERVER_ROLE.valueOf(
configuration.getValueAsString(GlobalConfiguration.HA_SERVER_ROLE).toUpperCase(Locale.ENGLISH));
}
@Override
public void startService() {
if (started)
return;
// WAIT THE HTTP SERVER IS CONNECTED AND ACQUIRES A LISTENING ADDRESS
while (!server.getHttpServer().isConnected())
CodeUtils.sleep(200);
started = true;
final String fileName = replicationPath + "/replication_" + server.getServerName() + ".rlog";
try {
replicationLogFile = new ReplicationLogFile(fileName);
lastDistributedOperationNumber.set(replicationLogFile.getLastMessageNumber());
if (lastDistributedOperationNumber.get() > -1)
LogManager.instance().log(this, Level.FINE, "Found an existent replication log. Starting messages from %d",
lastDistributedOperationNumber.get());
} catch (final IOException e) {
LogManager.instance().log(this, Level.SEVERE, "Error on creating replication file '%s' for remote server '%s'", fileName,
server.getServerName());
stopService();
throw new ReplicationLogException("Error on creating replication file '" + fileName + "'", e);
}
listener = new LeaderNetworkListener(this, new DefaultServerSocketFactory(),
configuration.getValueAsString(GlobalConfiguration.HA_REPLICATION_INCOMING_HOST),
configuration.getValueAsString(GlobalConfiguration.HA_REPLICATION_INCOMING_PORTS));
serverAddress = server.getHostAddress() + ":" + listener.getPort();
final String cfgServerList = configuration.getValueAsString(GlobalConfiguration.HA_SERVER_LIST).trim();
if (!cfgServerList.isEmpty()) {
final String[] serverEntries = cfgServerList.split(",");
configuredServers = serverEntries.length;
LogManager.instance()
.log(this, Level.FINE, "Connecting to servers %s (cluster=%s configuredServers=%d)", cfgServerList, bucketName,
configuredServers);
checkAllOrNoneAreLocalhosts(serverEntries);
serverAddressList.clear();
serverAddressList.addAll(Arrays.asList(serverEntries));
for (final String serverEntry : serverEntries) {
if (!isCurrentServer(serverEntry) && connectToLeader(serverEntry, null)) {
break;
}
}
}
if (leaderConnection.get() == null) {
final int majorityOfVotes = (configuredServers / 2) + 1;
LogManager.instance()
.log(this, Level.INFO, "Unable to find any Leader, start election (cluster=%s configuredServers=%d majorityOfVotes=%d)",
bucketName, configuredServers, majorityOfVotes);
if (serverRole != SERVER_ROLE.REPLICA)
startElection(false);
}
}
protected boolean isCurrentServer(final String serverEntry) {
if (serverAddress.equals(serverEntry))
return true;
final String[] localServerParts = HostUtil.parseHostAddress(serverAddress, DEFAULT_PORT);
try {
final String[] serverParts = HostUtil.parseHostAddress(serverEntry, DEFAULT_PORT);
if (localServerParts[0].equals(serverParts[0]) && localServerParts[1].equals(serverParts[1]))
return true;
final InetAddress localhostAddress = InetAddress.getLocalHost();
if (localhostAddress.getHostAddress().equals(serverParts[0]) && localServerParts[1].equals(serverParts[1]))
return true;
if (localhostAddress.getHostName().equals(serverParts[0]) && localServerParts[1].equals(serverParts[1]))
return true;
} catch (final UnknownHostException e) {
// IGNORE THIS EXCEPTION AND RETURN FALSE
}
return false;
}
@Override
public void stopService() {
started = false;
if (listener != null)
listener.close();
final Replica2LeaderNetworkExecutor lc = leaderConnection.get();
if (lc != null) {
lc.close();
leaderConnection.set(null);
}
if (!replicaConnections.isEmpty()) {
for (final Leader2ReplicaNetworkExecutor r : replicaConnections.values()) {
r.close();
}
replicaConnections.clear();
}
if (replicationLogFile != null)
replicationLogFile.close();
}
public void startElection(final boolean waitForCompletion) {
synchronized (this) {
if (electionThread == null) {
electionThread = new Thread(this::startElection, getServerName() + " election");
electionThread.start();
if (waitForCompletion) {
try {
electionThread.join(60 * 1_000);
} catch (InterruptedException e) {
LogManager.instance().log(this, Level.SEVERE, "Timeout on election process");
// IGNORE IT
}
}
}
}
}
private boolean checkForExistentLeaderConnection(final long electionTurn) {
final Replica2LeaderNetworkExecutor lc = leaderConnection.get();
if (lc != null) {
// I AM A REPLICA, NO LEADER ELECTION IS NEEDED
LogManager.instance()
.log(this, Level.INFO, "Abort election process, a Leader (%s) has been already found (turn=%d)", lc.getRemoteServerName(),
electionTurn);
return true;
}
return false;
}
private void sendNewLeadershipToOtherNodes() {
lastDistributedOperationNumber.set(replicationLogFile.getLastMessageNumber());
setElectionStatus(ELECTION_STATUS.LEADER_WAITING_FOR_QUORUM);
LogManager.instance()
.log(this, Level.INFO, "Contacting all the servers for the new leadership (turn=%d)...", lastElectionVote.getFirst());
for (final String serverAddress : serverAddressList) {
if (isCurrentServer(serverAddress))
// SKIP LOCAL SERVER
continue;
try {
final String[] parts = HostUtil.parseHostAddress(serverAddress, DEFAULT_PORT);
LogManager.instance().log(this, Level.INFO, "- Sending new Leader to server '%s'...", serverAddress);
final ChannelBinaryClient channel = createNetworkConnection(parts[0], Integer.parseInt(parts[1]),
ReplicationProtocol.COMMAND_ELECTION_COMPLETED);
channel.writeLong(lastElectionVote.getFirst());
channel.flush();
} catch (final Exception e) {
LogManager.instance().log(this, Level.INFO, "Error contacting server %s for election", serverAddress);
}
}
}
public Leader2ReplicaNetworkExecutor getReplica(final String replicaName) {
return replicaConnections.get(replicaName);
}
public void disconnectAllReplicas() {
final List replicas = new ArrayList<>(replicaConnections.values());
replicaConnections.clear();
for (Leader2ReplicaNetworkExecutor replica : replicas) {
try {
replica.close();
setReplicaStatus(replica.getRemoteServerName(), false);
} catch (Exception e) {
// IGNORE IT
}
}
configuredServers = 1;
}
public void setReplicaStatus(final String remoteServerName, final boolean online) {
final Leader2ReplicaNetworkExecutor c = replicaConnections.get(remoteServerName);
if (c == null) {
LogManager.instance().log(this, Level.SEVERE, "Replica '%s' was not registered", remoteServerName);
return;
}
c.setStatus(online ? Leader2ReplicaNetworkExecutor.STATUS.ONLINE : Leader2ReplicaNetworkExecutor.STATUS.OFFLINE);
try {
server.lifecycleEvent(online ? ReplicationCallback.TYPE.REPLICA_ONLINE : ReplicationCallback.TYPE.REPLICA_OFFLINE,
remoteServerName);
} catch (final Exception e) {
// IGNORE IT
}
if (electionStatus == ELECTION_STATUS.LEADER_WAITING_FOR_QUORUM) {
if (getOnlineServers() >= configuredServers / 2 + 1)
// ELECTION COMPLETED
setElectionStatus(ELECTION_STATUS.DONE);
}
}
public void receivedResponse(final String remoteServerName, final long messageNumber, final Object payload) {
final long receivedOn = System.currentTimeMillis();
final QuorumMessage msg = messagesWaitingForQuorum.get(messageNumber);
if (msg == null)
// QUORUM ALREADY REACHED OR TIMEOUT
return;
if (payload != null) {
synchronized (msg) {
if (msg.payloads == null)
msg.payloads = new ArrayList<>();
msg.payloads.add(payload);
}
}
msg.semaphore.countDown();
// UPDATE LATENCY
final Leader2ReplicaNetworkExecutor c = replicaConnections.get(remoteServerName);
if (c != null)
c.updateStats(msg.sentOn, receivedOn);
}
public void receivedResponseFromForward(final long messageNumber, final Object result, final ErrorResponse error) {
final ForwardedMessage msg = forwardMessagesWaitingForResponse.get(messageNumber);
if (msg == null)
// QUORUM ALREADY REACHED OR TIMEOUT
return;
LogManager.instance().log(this, Level.FINE, "Forwarded message %d has been executed", messageNumber);
msg.result = result;
msg.error = error;
msg.semaphore.countDown();
}
public ReplicationLogFile getReplicationLogFile() {
return replicationLogFile;
}
public ArcadeDBServer getServer() {
return server;
}
public boolean isLeader() {
return leaderConnection.get() == null;
}
public String getLeaderName() {
return leaderConnection.get() == null ? getServerName() : leaderConnection.get().getRemoteServerName();
}
public Replica2LeaderNetworkExecutor getLeader() {
return leaderConnection.get();
}
public String getServerName() {
return server.getServerName();
}
public String getClusterName() {
return bucketName;
}
public void registerIncomingConnection(final String replicaServerName, final Leader2ReplicaNetworkExecutor connection) {
final Leader2ReplicaNetworkExecutor previousConnection = replicaConnections.put(replicaServerName, connection);
if (previousConnection != null && previousConnection != connection) {
// MERGE CONNECTIONS
connection.mergeFrom(previousConnection);
}
final int totReplicas = replicaConnections.size();
if (1 + totReplicas > configuredServers)
// UPDATE SERVER COUNT
configuredServers = 1 + totReplicas;
sendCommandToReplicasNoLog(new UpdateClusterConfiguration(getServerAddressList(), getReplicaServersHTTPAddressesList()));
printClusterConfiguration();
}
public ELECTION_STATUS getElectionStatus() {
return electionStatus;
}
protected void setElectionStatus(final ELECTION_STATUS status) {
LogManager.instance().log(this, Level.INFO, "Change election status from %s to %s", this.electionStatus, status);
this.electionStatus = status;
}
public HAMessageFactory getMessageFactory() {
return messageFactory;
}
public void setServerAddresses(final String serverAddress) {
if (serverAddress != null && !serverAddress.isEmpty()) {
serverAddressList.clear();
final String[] servers = serverAddress.split(",");
serverAddressList.addAll(Arrays.asList(servers));
this.configuredServers = serverAddressList.size();
} else
this.configuredServers = 1;
}
/**
* Forward a command to the leader server. This occurs with transactions and DDL commands. If the timeout is 0, then the request is asynchronous and the
* response is a Resultset containing `{"operation", "forwarded to the leader"}`
*
* @param command HACommand to forward
* @param timeout Timeout in milliseconds. 0 for asynchronous commands
*
* @return the result from the command if synchronous, otherwise a result set containing `{"operation", "forwarded to the leader"}`
*/
public Object forwardCommandToLeader(final HACommand command, final long timeout) {
LogManager.instance().setContext(getServerName());
final Binary buffer = new Binary();
final String leaderName = getLeaderName();
final long opNumber = this.lastForwardOperationNumber.decrementAndGet();
LogManager.instance().log(this, Level.FINE, "Forwarding request %d (%s) to Leader server '%s'", opNumber, command, leaderName);
// REGISTER THE REQUEST TO WAIT FOR
final ForwardedMessage forwardedMessage = new ForwardedMessage();
if (leaderConnection.get() == null)
throw new ReplicationException("Leader not available");
forwardMessagesWaitingForResponse.put(opNumber, forwardedMessage);
try {
leaderConnection.get().sendCommandToLeader(buffer, command, opNumber);
if (timeout > 0) {
try {
if (forwardedMessage.semaphore.await(timeout, TimeUnit.MILLISECONDS)) {
if (forwardedMessage.error != null) {
// EXCEPTION
if (forwardedMessage.error.exceptionClass.equals(ConcurrentModificationException.class.getName()))
throw new ConcurrentModificationException(forwardedMessage.error.exceptionMessage);
else if (forwardedMessage.error.exceptionClass.equals(TransactionException.class.getName()))
throw new TransactionException(forwardedMessage.error.exceptionMessage);
else if (forwardedMessage.error.exceptionClass.equals(QuorumNotReachedException.class.getName()))
throw new QuorumNotReachedException(forwardedMessage.error.exceptionMessage);
LogManager.instance()
.log(this, Level.WARNING, "Unexpected error received from forwarding a transaction to the Leader");
throw new ReplicationException("Unexpected error received from forwarding a transaction to the Leader");
}
} else {
throw new TimeoutException("Error on forwarding transaction to the Leader server");
}
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
throw new ReplicationException(
"No response received from the Leader for request " + opNumber + " because the thread was interrupted");
}
} else
forwardedMessage.result = new InternalResultSet(new ResultInternal(Map.of("operation", "forwarded to the leader")));
} catch (final IOException | TimeoutException e) {
LogManager.instance().log(this, Level.SEVERE, "Leader server '%s' does not respond, starting election...", leaderName);
startElection(false);
} finally {
forwardMessagesWaitingForResponse.remove(opNumber);
}
return forwardedMessage.result;
}
public void sendCommandToReplicasNoLog(final HACommand command) {
checkCurrentNodeIsTheLeader();
final Binary buffer = new Binary();
// SEND THE REQUEST TO ALL THE REPLICAS
final List replicas = new ArrayList<>(replicaConnections.values());
// ASSURE THE TX ARE WRITTEN IN SEQUENCE INTO THE LOGFILE
synchronized (sendingLock) {
messageFactory.serializeCommand(command, buffer, -1);
LogManager.instance().log(this, Level.FINE, "Sending request (%s) to %s", -1, command, replicas);
for (final Leader2ReplicaNetworkExecutor replicaConnection : replicas) {
// STARTING FROM THE SECOND SERVER, COPY THE BUFFER
try {
replicaConnection.enqueueMessage(-1, buffer.slice(0));
} catch (final ReplicationException e) {
// REMOVE THE REPLICA
LogManager.instance().log(this, Level.SEVERE, "Replica '%s' does not respond, setting it as OFFLINE",
replicaConnection.getRemoteServerName());
setReplicaStatus(replicaConnection.getRemoteServerName(), false);
}
}
}
}
public List sendCommandToReplicasWithQuorum(final HACommand command, final int quorum, final long timeout) {
checkCurrentNodeIsTheLeader();
if (quorum > getOnlineServers()) {
// THE ONLY SMART THING TO DO HERE IS TO THROW AN EXCEPTION. IF THE SERVER WAITS THE ELECTION
// IS COMPLETED, IT COULD CAUSE A DEADLOCK BECAUSE LOCKS COULD BE ACQUIRED IN CASE OF TX
throw new QuorumNotReachedException(
"Quorum " + quorum + " not reached because only " + getOnlineServers() + " server(s) are online");
// waitAndRetryDuringElection(quorum);
// checkCurrentNodeIsTheLeader();
}
final Binary buffer = new Binary();
long opNumber = -1;
QuorumMessage quorumMessage = null;
List responsePayloads = null;
try {
while (true) {
int sent = 0;
// ASSURE THE TX ARE WRITTEN IN SEQUENCE INTO THE LOGFILE
synchronized (sendingLock) {
if (opNumber == -1)
opNumber = this.lastDistributedOperationNumber.incrementAndGet();
buffer.clear();
messageFactory.serializeCommand(command, buffer, opNumber);
if (quorum > 1) {
// REGISTER THE REQUEST TO WAIT FOR THE QUORUM
quorumMessage = new QuorumMessage(new CountDownLatch(quorum - 1));
messagesWaitingForQuorum.put(opNumber, quorumMessage);
}
// SEND THE REQUEST TO ALL THE REPLICAS
final List replicas = new ArrayList<>(replicaConnections.values());
LogManager.instance()
.log(this, Level.FINE, "Sending request %d '%s' to %s (quorum=%d)", opNumber, command, replicas, quorum);
for (final Leader2ReplicaNetworkExecutor replicaConnection : replicas) {
try {
if (replicaConnection.enqueueMessage(opNumber, buffer.slice(0)))
++sent;
else {
if (quorumMessage != null)
quorumMessage.semaphore.countDown();
}
} catch (final ReplicationException e) {
LogManager.instance().log(this, Level.SEVERE, "Error on replicating message %d to replica '%s' (error=%s)", opNumber,
replicaConnection.getRemoteServerName(), e);
// REMOVE THE REPLICA AND EXCLUDE IT FROM THE QUORUM
if (quorumMessage != null)
quorumMessage.semaphore.countDown();
}
}
}
if (sent < quorum - 1) {
checkCurrentNodeIsTheLeader();
LogManager.instance()
.log(this, Level.WARNING, "Quorum " + quorum + " not reached because only " + (sent + 1) + " server(s) are online");
throw new QuorumNotReachedException(
"Quorum " + quorum + " not reached because only " + (sent + 1) + " server(s) are online");
}
if (quorumMessage != null) {
try {
if (!quorumMessage.semaphore.await(timeout, TimeUnit.MILLISECONDS)) {
checkCurrentNodeIsTheLeader();
if (quorum > 1 + getOnlineReplicas())
if (waitAndRetryDuringElection(quorum))
continue;
checkCurrentNodeIsTheLeader();
LogManager.instance()
.log(this, Level.WARNING, "Timeout waiting for quorum (%d) to be reached for request %d", quorum, opNumber);
throw new QuorumNotReachedException(
"Timeout waiting for quorum (" + quorum + ") to be reached for request " + opNumber);
}
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
throw new QuorumNotReachedException(
"Quorum not reached for request " + opNumber + " because the thread was interrupted");
}
}
// WRITE THE MESSAGE INTO THE LOG FIRST
replicationLogFile.appendMessage(new ReplicationMessage(opNumber, buffer));
// OK
break;
}
} finally {
// REQUEST IS OVER, REMOVE FROM THE QUORUM MAP
if (quorumMessage != null) {
responsePayloads = quorumMessage.payloads;
messagesWaitingForQuorum.remove(opNumber);
}
}
return responsePayloads;
}
public int getMessagesInQueue() {
int total = 0;
for (Leader2ReplicaNetworkExecutor r : replicaConnections.values())
total += r.getMessagesInQueue();
return total;
}
public void setReplicasHTTPAddresses(final String replicasHTTPAddresses) {
this.replicasHTTPAddresses = replicasHTTPAddresses;
}
public String getReplicaServersHTTPAddressesList() {
if (isLeader()) {
final StringBuilder list = new StringBuilder();
for (final Leader2ReplicaNetworkExecutor r : replicaConnections.values()) {
final String addr = r.getRemoteServerHTTPAddress();
if (addr == null)
// HTTP SERVER NOT AVAILABLE YET
continue;
if (list.length() > 0)
list.append(",");
list.append(addr);
}
return list.toString();
}
return replicasHTTPAddresses;
}
public void removeServer(final String remoteServerName) {
final Leader2ReplicaNetworkExecutor c = replicaConnections.remove(remoteServerName);
if (c != null) {
//final RemovedServerInfo removedServer = new RemovedServerInfo(remoteServerName, c.getJoinedOn());
LogManager.instance()
.log(this, Level.SEVERE, "Replica '%s' seems not active, removing it from the cluster", remoteServerName);
c.close();
}
configuredServers = 1 + replicaConnections.size();
}
public int getOnlineServers() {
return 1 + getOnlineReplicas();
}
public int getOnlineReplicas() {
int total = 0;
for (final Leader2ReplicaNetworkExecutor c : replicaConnections.values()) {
if (c.getStatus() == Leader2ReplicaNetworkExecutor.STATUS.ONLINE)
total++;
}
return total;
}
public int getConfiguredServers() {
return configuredServers;
}
public String getServerAddressList() {
final StringBuilder list = new StringBuilder();
for (final String s : serverAddressList) {
if (list.length() > 0)
list.append(',');
list.append(s);
}
return list.toString();
}
public void printClusterConfiguration() {
final StringBuilder buffer = new StringBuilder("NEW CLUSTER CONFIGURATION\n");
final TableFormatter table = new TableFormatter((text, args) -> buffer.append(String.format(text, args)));
final List list = new ArrayList<>();
ResultInternal line = new ResultInternal();
list.add(new RecordTableFormatter.TableRecordRow(line));
Date date = new Date(startedOn);
String dateFormatted = startedOn > 0 ?
DateUtils.areSameDay(date, new Date()) ?
DateUtils.format(date, "HH:mm:ss") :
DateUtils.format(date, "yyyy-MM-dd HH:mm:ss") :
"";
line.setProperty("SERVER", getServerName());
line.setProperty("HOST:PORT", getServerAddress());
line.setProperty("ROLE", "Leader");
line.setProperty("STATUS", "ONLINE");
line.setProperty("JOINED ON", dateFormatted);
line.setProperty("LEFT ON", "");
line.setProperty("THROUGHPUT", "");
line.setProperty("LATENCY", "");
for (final Leader2ReplicaNetworkExecutor c : replicaConnections.values()) {
line = new ResultInternal();
list.add(new RecordTableFormatter.TableRecordRow(line));
final Leader2ReplicaNetworkExecutor.STATUS status = c.getStatus();
line.setProperty("SERVER", c.getRemoteServerName());
line.setProperty("HOST:PORT", c.getRemoteServerAddress());
line.setProperty("ROLE", "Replica");
line.setProperty("STATUS", status);
date = new Date(c.getJoinedOn());
dateFormatted = c.getJoinedOn() > 0 ?
DateUtils.areSameDay(date, new Date()) ?
DateUtils.format(date, "HH:mm:ss") :
DateUtils.format(date, "yyyy-MM-dd HH:mm:ss") :
"";
line.setProperty("JOINED ON", dateFormatted);
date = new Date(c.getLeftOn());
dateFormatted = c.getLeftOn() > 0 ?
DateUtils.areSameDay(date, new Date()) ?
DateUtils.format(date, "HH:mm:ss") :
DateUtils.format(date, "yyyy-MM-dd HH:mm:ss") :
"";
line.setProperty("LEFT ON", dateFormatted);
line.setProperty("THROUGHPUT", c.getThroughputStats());
line.setProperty("LATENCY", c.getLatencyStats());
}
table.writeRows(list, -1);
final String output = buffer.toString();
int hash = 7;
for (int i = 0; i < output.length(); i++)
hash = hash * 31 + output.charAt(i);
if (lastConfigurationOutputHash == hash)
// NO CHANGES, AVOID PRINTING CFG
return;
lastConfigurationOutputHash = hash;
LogManager.instance().log(this, Level.INFO, output + "\n");
}
public JSONObject getStats() {
final String dateTimeFormat = GlobalConfiguration.DATE_TIME_FORMAT.getValueAsString();
final JSONObject result = new JSONObject().setDateFormat(dateTimeFormat);
final JSONObject current = new JSONObject().setDateFormat(dateTimeFormat);
current.put("name", getServerName());
current.put("address", getServerAddress());
current.put("role", isLeader() ? "Leader" : "Replica");
current.put("status", "ONLINE");
Date date = new Date(startedOn);
String dateFormatted = DateUtils.areSameDay(date, new Date()) ?
DateUtils.format(date, "HH:mm:ss") :
DateUtils.format(date, "yyyy-MM-dd HH:mm:ss");
current.put("joinedOn", dateFormatted);
result.put("current", current);
if (isLeader()) {
final JSONArray replicas = new JSONArray();
for (final Leader2ReplicaNetworkExecutor c : replicaConnections.values()) {
final Leader2ReplicaNetworkExecutor.STATUS status = c.getStatus();
final JSONObject replica = new JSONObject().setDateFormat(dateTimeFormat);
replicas.put(replica);
replica.put("name", c.getRemoteServerName());
replica.put("address", c.getRemoteServerAddress());
replica.put("role", "Replica");
replica.put("status", status);
date = new Date(c.getJoinedOn());
dateFormatted = c.getJoinedOn() > 0 ?
DateUtils.areSameDay(date, new Date()) ?
DateUtils.format(date, "HH:mm:ss") :
DateUtils.format(date, "yyyy-MM-dd HH:mm:ss") :
"";
replica.put("joinedOn", dateFormatted);
date = new Date(c.getLeftOn());
dateFormatted = c.getLeftOn() > 0 ?
DateUtils.areSameDay(date, new Date()) ?
DateUtils.format(date, "HH:mm:ss") :
DateUtils.format(date, "yyyy-MM-dd HH:mm:ss") :
"";
replica.put("leftOn", dateFormatted);
replica.put("throughput", c.getThroughputStats());
replica.put("latency", c.getLatencyStats());
}
result.put("replicas", replicas);
}
return result;
}
public String getServerAddress() {
return serverAddress;
}
@Override
public String toString() {
return getServerName();
}
public void resendMessagesToReplica(final long fromMessageNumber, final String replicaName) {
// SEND THE REQUEST TO ALL THE REPLICAS
final Leader2ReplicaNetworkExecutor replica = replicaConnections.get(replicaName);
if (replica == null)
throw new ReplicationException(
"Server '" + getServerName() + "' cannot sync replica '" + replicaName + "' because it is offline");
final long fromPositionInLog = replicationLogFile.findMessagePosition(fromMessageNumber);
final AtomicInteger totalSentMessages = new AtomicInteger();
long min = -1, max = -1;
synchronized (sendingLock) {
for (long pos = fromPositionInLog; pos < replicationLogFile.getSize(); ) {
final Pair entry = replicationLogFile.getMessage(pos);
// STARTING FROM THE SECOND SERVER, COPY THE BUFFER
try {
LogManager.instance()
.log(this, Level.FINE, "Resending message (%s) to replica '%s'...", entry.getFirst(), replica.getRemoteServerName());
if (min == -1)
min = entry.getFirst().messageNumber;
max = entry.getFirst().messageNumber;
replica.sendMessage(entry.getFirst().payload);
totalSentMessages.incrementAndGet();
pos = entry.getSecond();
} catch (final Exception e) {
// REMOVE THE REPLICA
LogManager.instance().log(this, Level.SEVERE, "Replica '%s' does not respond, setting it as OFFLINE (error=%s)",
replica.getRemoteServerName(), e.toString());
setReplicaStatus(replica.getRemoteServerName(), false);
throw new ReplicationException("Cannot resend messages to replica '" + replicaName + "'", e);
}
}
}
LogManager.instance()
.log(this, Level.INFO, "Recovering completed. Sent %d message(s) to replica '%s' (%d-%d)", totalSentMessages.get(),
replicaName, min, max);
}
public boolean connectToLeader(final String serverEntry, final Callable errorCallback) {
final String[] serverParts = HostUtil.parseHostAddress(serverEntry, DEFAULT_PORT);
try {
connectToLeader(serverParts[0], Integer.parseInt(serverParts[1]));
// OK, CONNECTED
return true;
} catch (final ServerIsNotTheLeaderException e) {
final String leaderAddress = e.getLeaderAddress();
LogManager.instance().log(this, Level.INFO, "Remote server %s:%d is not the Leader, connecting to %s", serverParts[0],
Integer.parseInt(serverParts[1]), leaderAddress);
final String[] leader = HostUtil.parseHostAddress(leaderAddress, DEFAULT_PORT);
connectToLeader(leader[0], Integer.parseInt(leader[1]));
// OK, CONNECTED
return true;
} catch (final Exception e) {
LogManager.instance().log(this, Level.INFO, "Error connecting to the remote Leader server %s:%d (error=%s)", serverParts[0],
Integer.parseInt(serverParts[1]), e);
if (errorCallback != null)
errorCallback.call(e);
}
return false;
}
/**
* Connects to a remote server. The connection succeed only if the remote server is the leader.
*/
private void connectToLeader(final String host, final int port) {
final Replica2LeaderNetworkExecutor lc = leaderConnection.get();
if (lc != null) {
// CLOSE ANY LEADER CONNECTION STILL OPEN
lc.kill();
leaderConnection.set(null);
}
// KILL ANY ACTIVE REPLICA CONNECTION
for (final Leader2ReplicaNetworkExecutor r : replicaConnections.values())
r.close();
replicaConnections.clear();
leaderConnection.set(new Replica2LeaderNetworkExecutor(this, host, port));
leaderConnection.get().startup();
// START SEPARATE THREAD TO EXECUTE LEADER'S REQUESTS
leaderConnection.get().start();
}
protected ChannelBinaryClient createNetworkConnection(final String host, final int port, final short commandId)
throws IOException {
try {
server.lifecycleEvent(ReplicationCallback.TYPE.NETWORK_CONNECTION, host + ":" + port);
} catch (final Exception e) {
throw new ConnectionException(host + ":" + port, e);
}
final ChannelBinaryClient channel = new ChannelBinaryClient(host, port, this.configuration);
final String clusterName = this.configuration.getValueAsString(GlobalConfiguration.HA_CLUSTER_NAME);
// SEND SERVER INFO
channel.writeLong(ReplicationProtocol.MAGIC_NUMBER);
channel.writeShort(ReplicationProtocol.PROTOCOL_VERSION);
channel.writeString(clusterName);
channel.writeString(getServerName());
channel.writeString(getServerAddress());
channel.writeString(server.getHttpServer().getListeningAddress());
channel.writeShort(commandId);
return channel;
}
private boolean waitAndRetryDuringElection(final int quorum) {
if (electionStatus == ELECTION_STATUS.DONE)
// BLOCK HERE THE REQUEST, THE QUORUM CANNOT BE REACHED AT PRIORI
throw new QuorumNotReachedException(
"Quorum " + quorum + " not reached because only " + getOnlineServers() + " server(s) are online");
LogManager.instance()
.log(this, Level.INFO, "Waiting during election (quorum=%d onlineReplicas=%d)", quorum, getOnlineReplicas());
for (int retry = 0; retry < 10 && electionStatus != ELECTION_STATUS.DONE; ++retry) {
try {
Thread.sleep(500);
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
LogManager.instance()
.log(this, Level.INFO, "Waiting is over (electionStatus=%s quorum=%d onlineReplicas=%d)", electionStatus, quorum,
getOnlineReplicas());
return electionStatus == ELECTION_STATUS.DONE;
}
private void checkCurrentNodeIsTheLeader() {
if (!isLeader())
throw new ServerIsNotTheLeaderException("Cannot execute command", getLeader().getRemoteServerName());
}
private static void checkAllOrNoneAreLocalhosts(String[] serverEntries) {
int localHostServers = 0;
for (int i = 0; i < serverEntries.length; i++) {
final String serverEntry = serverEntries[i];
if (serverEntry.startsWith("localhost") || serverEntry.startsWith("127.0.0.1"))
++localHostServers;
}
if (localHostServers > 0 && localHostServers < serverEntries.length)
throw new ServerException(
"Found a localhost (127.0.0.1) in the server list among non-localhost servers. Please fix the server list configuration.");
}
private void startElection() {
try {
if (electionStatus == ELECTION_STATUS.VOTING_FOR_ME)
// ELECTION ALREADY RUNNING
return;
setElectionStatus(ELECTION_STATUS.VOTING_FOR_ME);
final long lastReplicationMessage = replicationLogFile.getLastMessageNumber();
long electionTurn = lastElectionVote == null ? 1 : lastElectionVote.getFirst() + 1;
final Replica2LeaderNetworkExecutor lc = leaderConnection.get();
if (lc != null) {
// CLOSE ANY LEADER CONNECTION STILL OPEN
lc.close();
leaderConnection.set(null);
}
// TODO: IF A LEADER START THE ELECTION, SHOULD IT CLOSE THE EXISTENT CONNECTIONS TO THE REPLICAS?
for (int retry = 0; !checkForExistentLeaderConnection(electionTurn) && started; ++retry) {
final int majorityOfVotes = (configuredServers / 2) + 1;
int totalVotes = 1;
lastElectionVote = new Pair<>(electionTurn, getServerName());
LogManager.instance().log(this, Level.INFO,
"Starting election of local server asking for votes from %s (turn=%d retry=%d lastReplicationMessage=%d configuredServers=%d majorityOfVotes=%d)",
serverAddressList, electionTurn, retry, lastReplicationMessage, configuredServers, majorityOfVotes);
final HashMap otherLeaders = new HashMap<>();
boolean electionAborted = false;
final HashSet serverAddressListCopy = new HashSet<>(serverAddressList);
for (final String serverAddressCopy : serverAddressListCopy) {
if (isCurrentServer(serverAddressCopy))
// SKIP LOCAL SERVER
continue;
try {
final String[] parts = HostUtil.parseHostAddress(serverAddressCopy, DEFAULT_PORT);
final ChannelBinaryClient channel = createNetworkConnection(parts[0], Integer.parseInt(parts[1]),
ReplicationProtocol.COMMAND_VOTE_FOR_ME);
channel.writeLong(electionTurn);
channel.writeLong(lastReplicationMessage);
channel.flush();
final byte vote = channel.readByte();
if (vote == 0) {
// RECEIVED VOTE
++totalVotes;
LogManager.instance()
.log(this, Level.INFO, "Received the vote from server %s (turn=%d totalVotes=%d majority=%d)", serverAddressCopy,
electionTurn, totalVotes, majorityOfVotes);
} else {
final String otherLeaderName = channel.readString();
if (!otherLeaderName.isEmpty()) {
final Integer counter = otherLeaders.get(otherLeaderName);
otherLeaders.put(otherLeaderName, counter == null ? 1 : counter + 1);
}
if (vote == 1) {
// NO VOTE, IT ALREADY VOTED FOR SOMEBODY ELSE
LogManager.instance().log(this, Level.INFO,
"Did not receive the vote from server %s (turn=%d totalVotes=%d majority=%d itsLeader=%s)", serverAddressCopy,
electionTurn, totalVotes, majorityOfVotes, otherLeaderName);
} else if (vote == 2) {
// NO VOTE, THE OTHER NODE HAS A HIGHER LSN, IT WILL START THE ELECTION
electionAborted = true;
LogManager.instance().log(this, Level.INFO,
"Aborting election because server %s has a higher LSN (turn=%d lastReplicationMessage=%d totalVotes=%d majority=%d)",
serverAddressCopy, electionTurn, lastReplicationMessage, totalVotes, majorityOfVotes);
}
}
channel.close();
} catch (final Exception e) {
LogManager.instance()
.log(this, Level.INFO, "Error contacting server %s for election: %s", serverAddressCopy, e.getMessage());
}
}
if (checkForExistentLeaderConnection(electionTurn))
break;
if (!electionAborted && totalVotes >= majorityOfVotes) {
LogManager.instance()
.log(this, Level.INFO, "Current server elected as new $ANSI{green Leader} (turn=%d totalVotes=%d majority=%d)",
electionTurn, totalVotes, majorityOfVotes);
sendNewLeadershipToOtherNodes();
break;
}
if (!otherLeaders.isEmpty()) {
// TRY TO CONNECT TO THE EXISTENT LEADER
LogManager.instance()
.log(this, Level.INFO, "Other leaders found %s (turn=%d totalVotes=%d majority=%d)", otherLeaders, electionTurn,
totalVotes, majorityOfVotes);
for (final Map.Entry entry : otherLeaders.entrySet()) {
if (entry.getValue() >= majorityOfVotes) {
LogManager.instance()
.log(this, Level.INFO, "Trying to connect to the existing leader '%s' (turn=%d totalVotes=%d majority=%d)",
entry.getKey(), electionTurn, entry.getValue(), majorityOfVotes);
if (!isCurrentServer(entry.getKey()) && connectToLeader(entry.getKey(), null))
break;
}
}
}
if (checkForExistentLeaderConnection(electionTurn))
break;
try {
long timeout = 1000 + new Random().nextInt(1000);
if (electionAborted)
timeout *= 3;
LogManager.instance()
.log(this, Level.INFO, "Not able to be elected as Leader, waiting %dms and retry (turn=%d totalVotes=%d majority=%d)",
timeout, electionTurn, totalVotes, majorityOfVotes);
Thread.sleep(timeout);
} catch (final InterruptedException e) {
// INTERRUPTED
Thread.currentThread().interrupt();
break;
}
if (checkForExistentLeaderConnection(electionTurn))
break;
++electionTurn;
}
} finally {
synchronized (this) {
electionThread = null;
}
}
}
}