org.rhq.enterprise.agent.PrimaryServerSwitchoverThread Maven / Gradle / Ivy
The newest version!
/*
* RHQ Management Platform
* Copyright (C) 2005-2008 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.rhq.enterprise.agent;
import mazz.i18n.Logger;
import org.rhq.core.domain.cloud.composite.FailoverListComposite;
import org.rhq.core.domain.cloud.composite.FailoverListComposite.ServerEntry;
import org.rhq.enterprise.agent.i18n.AgentI18NFactory;
import org.rhq.enterprise.agent.i18n.AgentI18NResourceKeys;
import org.rhq.enterprise.communications.command.CommandResponse;
import org.rhq.enterprise.communications.command.client.ClientCommandSender;
import org.rhq.enterprise.communications.command.client.RemoteCommunicator;
import org.rhq.enterprise.communications.command.impl.identify.IdentifyCommand;
import org.rhq.enterprise.communications.util.NotProcessedException;
import org.rhq.enterprise.communications.util.SecurityUtil;
/**
* This thread's job is to periodically try to get the agent to point back to
* its primary server, if it isn't pointing to that server already.
*
* The "primary server" is the server found at the top of the agent's failover list.
* If the agent is already talking to this server, or if the agent does not yet have
* a failover list, nothing needs to be done.
*
* If the agent is talking to another server, this thread will probe the primary server
* and if it can, this thread will switch the agent's sender back to point to the primary.
*
* If the agent is not in sending mode, this thread will not do anything until it is. The agent
* will decide what server it should talk to in that case. This thread is only here
* to prevent an agent talking to a non-primary server for a long time when the primary
* server is available.
*
* @author John Mazzitelli
*/
public class PrimaryServerSwitchoverThread extends Thread {
private static final Logger LOG = AgentI18NFactory.getLogger(AgentMain.class);
private final AgentMain agent;
/**
* The amount of time in milliseconds that this thread will sleep in between polling the server.
*/
private long interval = 1000L * 60 * 60; // 1 hour
/**
* Will be true
when this thread is told to stop polling. Note that this does not necessarily mean the
* thread is stopped, it just means this thread was told to stop.
*/
private volatile boolean toldToStop = false;
public PrimaryServerSwitchoverThread(AgentMain agent) {
super("RHQ Primary Server Switchover Thread");
setDaemon(true);
this.agent = agent;
}
@Override
public void run() {
LOG.info(AgentI18NResourceKeys.PRIMARY_SERVER_SWITCHOVER_THREAD_STARTED);
while (!isInterrupted() && !toldToStop) {
try {
// Note that if the agent is not sending or the failover list doesn't have any servers,
// then we skip this time and wait some more.
// However, it the agent is sending and we have a failover list, then we need to check
// to see if the server we are currently talking to is the same as primary server, listed
// at the top of the failover list. If not the same, we ask the agent to switch to that server.
ClientCommandSender sender = this.agent.getClientCommandSender();
if (sender.isSending()) {
FailoverListComposite failoverList = this.agent.downloadServerFailoverList(); // ask the server for a new one
// if the failover list doesn't have any servers, skip our poll and wait some more
if (failoverList.size() > 0) {
AgentConfiguration config = this.agent.getConfiguration();
String transport = config.getServerTransport();
String transportParams = config.getServerTransportParams();
String currentServerAddress = config.getServerBindAddress();
int currentServerPort = config.getServerBindPort();
ServerEntry primary = failoverList.get(0); // get the top of the list, aka primary server
String primaryAddress = primary.address;
int primaryPort = (SecurityUtil.isTransportSecure(transport)) ? primary.securePort
: primary.port;
if (!primaryAddress.equals(currentServerAddress) || primaryPort != currentServerPort) {
LOG.info(AgentI18NResourceKeys.NOT_TALKING_TO_PRIMARY_SERVER, primaryAddress, primaryPort,
currentServerAddress, currentServerPort);
// create our own comm so we ping in an isolated client - don't reuse the sender's comm for this
RemoteCommunicator comm = this.agent.createServerRemoteCommunicator(transport,
primaryAddress, primaryPort, transportParams);
if (ping(comm)) {
LOG.info(AgentI18NResourceKeys.PRIMARY_SERVER_UP, primaryAddress, primaryPort);
failoverList.resetIndex(); // so the failover method call starts at the top
this.agent.failoverToNewServer(sender.getRemoteCommunicator()); // note that we make sure we pass in the sender's comm
} else {
LOG.info(AgentI18NResourceKeys.PRIMARY_SERVER_STILL_DOWN, primaryAddress, primaryPort);
}
}
}
}
// to do sleep until its time to check again
synchronized (this) {
wait(interval);
}
} catch (InterruptedException ie) {
break; // exiting
} catch (Exception e) {
LOG.warn(e, AgentI18NResourceKeys.PRIMARY_SERVER_SWITCHOVER_EXCEPTION, e);
}
}
LOG.info(AgentI18NResourceKeys.PRIMARY_SERVER_SWITCHOVER_THREAD_STOPPED);
return;
}
/**
* Sets the time (in milliseconds) that this thread sleeps between checks.
*
* @param interval sleep time, in milliseconds (must not be less than 1000)
*/
public void setInterval(long interval) {
this.interval = interval;
}
/**
* Call this method when you want to stop this thread, which effectively stops it from
* checking that the agent is pointing to its primary server.
*/
public void stopChecking() {
toldToStop = true;
interrupt();
// no need to notify, wait will exit
}
/**
* Forces this thread to check now and switch to the primary if needed. If the thread
* is already checking, this method does nothing. Effectively, this method wakes up
* this thread if its sleeping during the {@link #setInterval(long) sleep interval}.
*/
public void checkNow() {
synchronized (this) {
notifyAll();
}
}
/**
* Given the remote communicator (which isn't the one in the agent's command sender), this sends a ping
* request to the remote endpoint and returns true
if the remote endpoint is up.
*
* @param comm the communicator used to send the message
*
* @return true
if the communicator can send the message; false
if the remote endpoint is down
*
* @throws Throwable
*/
private boolean ping(RemoteCommunicator comm) {
boolean ok = true; // assume we can ping; on error, we'll set this to false
IdentifyCommand id_cmd = new IdentifyCommand();
this.agent.getClientCommandSender().preprocessCommand(id_cmd);
try {
CommandResponse response = comm.sendWithoutCallbacks(id_cmd);
// there is a special case when we might get a response back but it should be considered "server down".
// that is: when the server replies with a NotProcessedException response
if (response.getException() instanceof NotProcessedException) {
ok = false;
}
} catch (Throwable e) {
ok = false;
}
return ok;
}
}