org.xins.server.CheckLinks Maven / Gradle / Ivy
/*
* $Id: CheckLinks.java,v 1.43 2011/04/16 15:48:02 agoubard Exp $
*
* See the COPYRIGHT file for redistribution and use restrictions.
*/
package org.xins.server;
import java.io.IOException;
import java.net.ConnectException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpOptions;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.params.CoreConnectionPNames;
import org.w3c.dom.Element;
import org.xins.common.MandatoryArgumentChecker;
import org.xins.common.Utils;
import org.xins.common.service.Descriptor;
import org.xins.common.service.TargetDescriptor;
import org.xins.common.xml.ElementBuilder;
/**
* Checks all the links in the given descriptor
s list and builds
* a FunctionResult
. It connects to each link in
* {@link TargetDescriptor}s in {@link Descriptor}s list using a
* {@link URLChecker} and calculates the total links count and
* total links failures. The returned {@link FunctionResult} contains
* information about total links checked, failures and details.
*
* The following example uses a {@link CheckLinks} object to get the
* {@link FunctionResult}.
*
*
* FunctionResult result = CheckLinks.checkLinks(descriptorList);
*
* // Returns parameters
* result.getParameters();
*
*
* @version $Revision: 1.43 $ $Date: 2011/04/16 15:48:02 $
* @author Tauseef Rehman
*/
class CheckLinks {
/**
* The failure message to be added in the FunctionResult
when
* the exception is UnknownHostException
.
*/
private static final String UNKNOWN_HOST = "UnknownHost";
/**
* The failure message to be added in the FunctionResult
when
* the exception is ConnectTimeoutException
or the message
* of the exception starts with "Connect timed out".
*/
private static final String CONNECTION_TIMEOUT = "ConnectionTimeout";
/**
* The failure message to be added in the FunctionResult
when
* the exception is ConnectException
.
*/
private static final String CONNECTION_REFUSAL = "ConnectionRefusal";
/**
* The failure message to be added in the FunctionResult
when
* the exception is SocketTimeoutException
.
*/
private static final String SOCKET_TIMEOUT = "SocketTimeout";
/**
* The failure message to be added in the FunctionResult
when
* the exception is IOException
.
*/
private static final String OTHER_IO_ERROR = "OtherIOError";
/**
* The failure message to be added in the FunctionResult
when
* the exception is an unknown Exception
.
*/
private static final String OTHER_FAILURE = "OtherFailure";
/**
* The success message to be added in the FunctionResult
.
*/
private static final String SUCCESS = "Success";
/**
* HTTP retry handler that does not allow any retries.
*/
private static DefaultHttpRequestRetryHandler NO_RETRIES = new DefaultHttpRequestRetryHandler(0, false);
/**
* Checks all the links in TargetDescriptor
s inside the
* Descriptor
list and builds a FunctionResult
.
* First gets all the {@link TargetDescriptor}s from the
* {@link Descriptor}s list then creates {@link URLChecker} threads with
* {@link TargetDescriptor}s and runs them. When all the threads have
* finished execution, the {@link FunctionResult} is built and returned.
* The returned {@link FunctionResult} contains all the links which were
* checked with their results.
*
* @param descriptors
* the list of {@link Descriptor}s defined in the runtime properties,
* cannot be null
.
*
* @return
* the constructed {@link FunctionResult} object, never
* null
.
*
* @throws IllegalArgumentException
* if descriptors == null
.
*/
static FunctionResult checkLinks(List descriptors)
throws IllegalArgumentException {
// Check preconditions
MandatoryArgumentChecker.check("descriptors", descriptors);
List threads = new ArrayList();
if (descriptors.size() > 0) {
// Get all the targets from the descriptor list
List targetDescriptors = getTargetDescriptors(descriptors);
// Create the thread for each target and run them
threads = createAndRunUrlCheckers(targetDescriptors);
// Get the biggest time-out from all the targets
int timeout = getBiggestTimeout(targetDescriptors);
// Wait till all the threads finish their execution or timedout.
waitTillThreadsRunning(threads, timeout);
// Confirm all threads have finished their execution.
confirmThreadsStopped(threads);
}
// Start building the result
FunctionResult builder = new FunctionResult();
int errorCount = (descriptors.size() > 0)
? addCheckElements(builder, threads)
: 0;
builder.param("linkCount", String.valueOf(threads.size()));
builder.param("errorCount", String.valueOf(errorCount));
return builder;
}
/**
* Creates a list of TargetDescriptor
s from the
* given Descriptor
s list. Each {@link Descriptor} in the
* list contains a list of {@link TargetDescriptor}s, which are added to
* the returned list.
*
* @param descriptors
* the list of {@link Descriptor}s, cannot be null
.
*
* @return
* the constructed {@link TargetDescriptor}s list, never
* null
.
*
* @throws IllegalArgumentException
* if descriptors == null
.
*/
private static List getTargetDescriptors(List descriptors)
throws IllegalArgumentException {
// Check preconditions
MandatoryArgumentChecker.check("descriptors", descriptors);
Iterator descriptorIterator = descriptors.iterator();
List targetDescriptors = new ArrayList();
// Each descriptor in the list contains target descriptors, so
// iterate over descriptors and get all the target descriptors, then
// iterate over each target descriptor and get the individual
// target descriptors.
while (descriptorIterator.hasNext()) {
Descriptor descriptor = (Descriptor) descriptorIterator.next();
// Get the iterator on target descriptor
Iterator targetIterator = descriptor.iterateTargets();
while (targetIterator.hasNext()) {
TargetDescriptor targetDescriptor =
(TargetDescriptor) targetIterator.next();
// Add all the target descriptors in a list
targetDescriptors.add(targetDescriptor);
}
}
return targetDescriptors;
}
/**
* Creates and runs a thread for each TargetDescriptor
in the
* given list. Each {@link TargetDescriptor} in the list contains a URL. A
* {@link URLChecker} thread is created for each {@link TargetDescriptor},
* which tries to connect to the URL provided in the
* {@link TargetDescriptor}. Each thread is then added to a list which is
* returned.
*
* @param targetDescriptors
* the list of {@link TargetDescriptor}s which needs to be checked,
* cannot be null
.
*
* @return
* the constructed {@link URLChecker}s list, never null
.
*
* @throws IllegalArgumentException
* if targetDescriptors == null
.
*/
private static List createAndRunUrlCheckers(List targetDescriptors)
throws IllegalArgumentException {
// Check preconditions
MandatoryArgumentChecker.check("targetDescriptors", targetDescriptors);
// Iterate over all target descriptors
List threads = new ArrayList();
Iterator targets = targetDescriptors.iterator();
while (targets.hasNext()) {
TargetDescriptor target = targets.next();
// Create a thread for the target descriptor
URLChecker urlThread = new URLChecker(target);
// Start the thread with target descriptor
urlThread.start();
// Store the thread just started in a list
threads.add(urlThread);
}
return threads;
}
/**
* Returns the biggest time-out of all the URLs defined in
* TargetDescriptor
s list. Each {@link TargetDescriptor} in
* the list has total time-out. The biggest of all of them is returned.
* This time-out is then used to setup the time-outs of the
* {@link URLChecker} threads.
*
* @param targetDescriptors
* the list of {@link TargetDescriptor}s, cannot be null
.
*
* @return
* the biggest time-out from the list, or -1
if none of the
* target descriptors defines a time-out.
*
* @throws IllegalArgumentException
* if targetDescriptors == null
.
*/
private static int getBiggestTimeout(List targetDescriptors)
throws IllegalArgumentException {
// Check preconditions
MandatoryArgumentChecker.check("targetDescriptors", targetDescriptors);
Iterator targets = targetDescriptors.iterator();
int biggestTimeout = -1;
// Iterate over all target descriptors
while (targets.hasNext()) {
TargetDescriptor target = targets.next();
// Try to get the biggest time out of all the target descriptors
if (biggestTimeout < target.getTotalTimeOut()) {
biggestTimeout = target.getTotalTimeOut();
}
}
return biggestTimeout;
}
/**
* Sets up the time-out for each thread and waits till each thread finishes
* execution. The time-out is the biggest time-out of all the URLs in
* {@link TargetDescriptor}s. Timeout for every next thread also considers
* the time which is already spent and that time is subtracted from the
* time-out for the current thread.
*
* @param threads
* the list of {@link URLChecker} threads, cannot be null
.
*
* @param timeout
* the time-out for {@link URLChecker} threads.
*
* @throws IllegalArgumentException
* if threads == null
.
*/
private static void waitTillThreadsRunning(List threads, int timeout)
throws IllegalArgumentException {
// Check preconditions
MandatoryArgumentChecker.check("threads", threads);
long threadTimeout = timeout;
// Storing the time approximately when the first thread was started
long startTime = System.currentTimeMillis();
try {
for (URLChecker urlThread : threads) {
urlThread.join(threadTimeout);
// If the previous thread was setup with a certain time-out
// the next thread should be setup with a time-out subtracted
// by the time which is already passed.
long endTime = System.currentTimeMillis();
long timePassed = endTime - startTime;
threadTimeout = timeout - timePassed;
// If the time-out becomes negative, it means that the total
// time-out interval has passed now we do not need to setup
// time-out for threads and they all should have finished
// execution by now.
if (threadTimeout <= 0) {
return;
}
}
} catch (InterruptedException exception) {
// The exception is thrown when another thread has interrupted
// the current thread. This should never happen so it should log
// a programming error and throw a ProgrammingException.
throw Utils.logProgrammingError(exception);
}
}
/**
* Confimrs that each URLChecker
has finished its execution.
* If some threads are still running, inforce a connection time-out and let
* it run and ignore.
*
* @param threads
* the list of {@link URLChecker} threads, cannot be null
.
*
* @throws IllegalArgumentException
* if threads == null
.
*/
private static void confirmThreadsStopped(List threads)
throws IllegalArgumentException {
for (URLChecker urlThread : threads) {
// Check if thread is still alive.
if (urlThread.isAlive()) {
// Enforce a time-out for the thread and log it.
urlThread.enforceTimeout();
Log.log_3505(urlThread.getURL());
}
}
}
/**
* Builds the FunctionResult
for all the URLs checked. It
* iterates over the list of all {@link URLChecker} threads and gets the
* information like the total time each thread took to execute and the
* result of the execution. The information is added in an
* {@link ElementBuilder} object using which {@link org.w3c.dom.Element}
* is created which then is added to the passed {@link FunctionResult}.
*
* @param builder
* the {@link FunctionResult} where the result is added, cannot be
* null
.
*
* @param threads
* the list of {@link URLChecker} threads, cannot be null
.
*
* @return
* the total number of URLs without success.
*
* @throws IllegalArgumentException
* if builder == null || threads == null
.
*/
private static int addCheckElements(FunctionResult builder, List threads)
throws IllegalArgumentException {
int errorCount = 0;
// Iterate over the threads of target descriptors and create the check element.
for (URLChecker urlThread : threads) {
Element check = builder.getDataElementBuilder().createElement("check");
check.setAttribute("url", urlThread.getURL());
check.setAttribute("duration", Long.toString(urlThread.getDuration()));
check.setAttribute("result", getResult(urlThread));
builder.getDataElement().appendChild(check);
if (!urlThread.getSuccess()) {
errorCount ++;
}
}
return errorCount;
}
/**
* Returns the value for the result parameter which is added in the
* FunctionBuilder
. The value of the result depends on the
* success or failure of the passed {@link URLChecker} thread. If the
* {@link URLChecker} thread gives a success, the status code of the
* {@link URLChecker} thread is used to create the value for result
* parameter, otherwise the exception in the {@link URLChecker} thread
* determines the value for the result parameter.
*
* @param urlThread
* the {@link URLChecker} thread for which the result value is to
* detemined, cannot be null
.
*
* @return
* the result message, never null
.
*
* @throws IllegalArgumentException
* if urlThread == null || urlThread.hasRun() == false
.
*/
private static String getResult(URLChecker urlThread)
throws IllegalArgumentException {
// Check preconditions
MandatoryArgumentChecker.check("urlThread", urlThread);
if (! urlThread.hasRun()) {
throw new IllegalArgumentException("urlThread().hasRun() == false");
}
if (urlThread.getSuccess()) {
return SUCCESS;
} else {
return getResult(urlThread.getException(), urlThread.getURL());
}
}
/**
* Returns the value for the result parameter which is added in the
* FunctionBuilder
when the URLChecker
thread
* failed to connect the URL. The value for the result parameter depends
* on the exception occured in the {@link URLChecker} thread. The
* exception is passed to this method. Based on the type of exception, an
* appropriate value is returned.
*
* @param exception
* the {@link Throwable} exception occured in the {@link URLChecker}
* thread, cannot be null
.
*
* @param url
* the url which threw the exception, cannot be null
.
*
* @return
* the result message, never null
.
*
* @throws IllegalArgumentException
* if exception == null
.
*/
private static String getResult(Throwable exception, String url)
throws IllegalArgumentException {
// Check preconditions.
MandatoryArgumentChecker.check("exception", exception, "url", url);
String exceptionName = exception.getClass().getName();
String result;
// DNS error, unknown host name
if (exception instanceof UnknownHostException) {
result = UNKNOWN_HOST;
// Connection time-out
} else if (exception instanceof ConnectTimeoutException) {
result = CONNECTION_TIMEOUT;
// Connection refused
} else if (exception instanceof ConnectException) {
result = CONNECTION_REFUSAL;
// SocketTimeoutException is not available in older Java versions,
// so we do not refer to the class to avoid a NoClassDefFoundError.
} else if (exception instanceof java.net.SocketTimeoutException) {
result = SOCKET_TIMEOUT;
// Other I/O error
} else if (exception instanceof IOException) {
result = OTHER_IO_ERROR;
// Other error, apparently not an I/O error
} else {
result = OTHER_FAILURE;
}
// Log the result and exception.
Log.log_3502(exception, url, result);
return result;
}
/**
* Creates a new CheckLinks
object.
*/
private CheckLinks() {
// empty
}
/**
* Tries to connect to a URL provided in the
* TargetDescriptor
. Runs as a separate thread. The URL is
* connected by sending a request associated with an HTTP
* OPTIONS
method. Also calculates the total time to
* connect to the provided URL.
*
* The following example uses a {@link CheckLinks} object to get the
* {@link FunctionResult}.
*
*
TargetDescriptor target = new TargetDescriptor();
* target.setURL("www.hotmail.com");
*
* URLChecker urlThread = new URLChecker(target);
* urlThread.start();
*
* String URL = urlThread.getURL();
* int duration = urlThread.getDuration();
* boolean success = urlThread.getSuccess();
* if (!success) {
* exception = urlThread.getException();
* }
*
* @version $Revision: 1.43 $ $Date: 2011/04/16 15:48:02 $
* @author Tauseef Rehman
*/
private static final class URLChecker extends Thread {
/**
* The target descriptor for which the URL needs to be checked. Never
* null
.
*/
private final TargetDescriptor _targetDescriptor;
/**
* The URL to be checked. Never null
.
*/
private final String _url;
/**
* The exception thrown when accessing the URL. Can be
* null
if the URLChecker
has not run yet, or
* if there was no error.
*/
private Throwable _exception;
/**
* The result of the URL check. Is true
if the
* URLChecker
has run and was successful. If either of
* these conditions is not met, then false
.
*/
private boolean _success;
/**
* The time taken to check the URL. Initially -1
.
*/
private long _duration;
/**
* The status code returned when the URL was called. Initially
* -1
, when the URLChecker
was not run yet.
*/
private int _statusCode;
/**
* Constructs a new URLChecker
for the specified target
* descriptor.
*
* @param targetDescriptor
* the {@link TargetDescriptor}, whose URL needs to be checked,
* cannot be null
.
*
* @throws IllegalArgumentException
* if targetDescriptor == null
.
*/
public URLChecker(TargetDescriptor targetDescriptor)
throws IllegalArgumentException {
// Check preconditions
MandatoryArgumentChecker.check("targetDescriptor", targetDescriptor);
// Initialize fields
_targetDescriptor = targetDescriptor;
_url = targetDescriptor.getURL();
_duration = -1;
_statusCode = -1;
// Check postconditions
if (_url == null) {
throw Utils.logProgrammingError("_url == null");
}
}
/**
* Runs this thread. It tries to connect to the URL provided in the
* {@link TargetDescriptor}. The URL is connected by sending a request
* associated with an HTTP OPTIONS
method. It also
* calculates the total time to connect to the provided URL and saves
* the exception in case an exception occurs.
*
* @throws IllegalStateException
* if this URLChecker
has already run.
*/
public void run() throws IllegalStateException {
// Check preconditions
if (hasRun()) {
throw new IllegalStateException("This URLChecker for URL: "
+ _url + "has already run.");
}
// Logging the start of this thread.
Log.log_3503(_url,
_targetDescriptor.getTotalTimeOut(),
_targetDescriptor.getConnectionTimeOut(),
_targetDescriptor.getSocketTimeOut());
// Register current time, to compute total duration later
long startTime = System.currentTimeMillis();
HttpRequestBase optionsMethod = null;
try {
DefaultHttpClient client = new DefaultHttpClient();
// Set the socket time-out for the URL.
client.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT, _targetDescriptor.getSocketTimeOut());
// Set the connection time-out for the URL.
client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, _targetDescriptor.getConnectionTimeOut());
client.setHttpRequestRetryHandler(NO_RETRIES);
// Create a new OptionsMethod with the URL, this will represent
// a request for information about the communication options
// available on the request/response chain identified by the url.
// This method allows the client to determine the options and/or
// requirements associated with a resource, or the capabilities
// of a server, without implying a resource action or initiating
// a resource retrieval.
optionsMethod = new HttpOptions(_url);
// Execute the OptionsMethod.
client.execute(optionsMethod);
// Successfully executed, so set the success as true.
_success = true;
} catch (Throwable exception) {
// Save the exception and set the success as false as the
// execution was failed.
_exception = exception;
_success = false;
} finally {
optionsMethod.abort();
}
// Calculate the total time taken to check the URL.
_duration = System.currentTimeMillis() - startTime;
// Logging the stopping of this thread.
Log.log_3504(_url, _duration);
}
/**
* Checks if this URLChecker
has already run.
*
* @return
* true
if this URLChecker
has already run,
* or false
otherwise.
*/
boolean hasRun() {
return (_duration >= 0);
}
/**
* Checks if this URLChecker
has already run and if not,
* throws an exception.
*
* @throws IllegalStateException
* if this URLChecker
has not run yet.
*/
private void assertHasRun() throws IllegalStateException {
if (!hasRun()) {
String message = "This URLChecker has not run yet. URL: \"" + _url + "\".";
throw new IllegalStateException(message);
}
}
/**
* Returns the total time it took to connect to the URL.
*
* @return
* the total duration in milliseconds, or -1
if this
* thread has not run.
*
* @throws IllegalStateException
* if this URLChecker
has not run yet.
*/
public long getDuration() throws IllegalStateException {
assertHasRun();
return _duration;
}
/**
* Returns the flag indicating if the URL was connected successfully.
*
* @return
* the success flag, Is true
if this thread has run and
* was successful. If either of these conditions is not met,
* then false
.
*
* @throws IllegalStateException
* if this URLChecker
has not run yet.
*/
public boolean getSuccess() throws IllegalStateException {
assertHasRun();
return _success;
}
/**
* Returns the status code of the method execution.
*
* @return
* the status code returned when the URL was called. -1
,
* when this thread has not run.
*
* @throws IllegalStateException
* if this URLChecker
has not run yet.
*/
public int getStatusCode() throws IllegalStateException {
assertHasRun();
return _statusCode;
}
/**
* Returns the URL which was connected.
*
* @return
* the URL, never null
.
*
* @throws IllegalStateException
* if this URLChecker
has not run yet.
*/
public String getURL() throws IllegalStateException {
assertHasRun();
return _url;
}
/**
* Returns the exception thrown while trying to connect to the URL.
*
* @return
* the exception, can be null
.
*
* @throws IllegalStateException
* if this URLChecker
has not run yet.
*/
public Throwable getException() throws IllegalStateException {
assertHasRun();
return _exception;
}
/**
* Enforces a time-out on the URLChecker
thread. Actualy
* the thread is allowed to run and ignored. So set the duration as the
* initial connection time-out value and create a new
* {@link ConnectException}.
*/
public void enforceTimeout() {
if (! hasRun()) {
// Set the duration as was defined for connection time-out
_duration = _targetDescriptor.getConnectionTimeOut();
// Create a new ConnectException.
_exception = new ConnectException("Connect timed out");
// XXX: Currently it is observed that mostly the URLs which are
// expected to throw a ConnectTimeoutException keeps on running
// but we need to take care of the situation when because of some
// other reason the thread is still active.
}
}
}
}