org.apache.maven.doxia.linkcheck.DefaultLinkCheck Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of doxia-linkcheck Show documentation
Show all versions of doxia-linkcheck Show documentation
Doxia linkcheck is a tool to check the validity of links
The newest version!
package org.apache.maven.doxia.linkcheck;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
import org.apache.maven.doxia.linkcheck.model.io.xpp3.LinkcheckModelXpp3Writer;
import org.apache.maven.doxia.linkcheck.validation.FileLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.HTTPLinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationItem;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidatorManager;
import org.apache.maven.doxia.linkcheck.validation.MailtoLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OfflineHTTPLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator;
import org.codehaus.plexus.util.FileUtils;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.ReaderFactory;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.WriterFactory;
/**
* The main bean to be called whenever a set of documents should have their links checked.
*
* @author Ben Walding
* @author Carlos Sanchez
* @author Arnaud Heritier
* @author Vincent Siveton
* @version $Id: DefaultLinkCheck.java 1002602 2010-09-29 13:09:25Z ltheussl $
*
* @plexus.component role="org.apache.maven.doxia.linkcheck.LinkCheck" role-hint="default"
*/
public final class DefaultLinkCheck
implements LinkCheck
{
/** Log. */
private static final Log LOG = LogFactory.getLog( DefaultLinkCheck.class );
/** One MegaByte. */
private static final long MEG = 1024 * 1024;
/** The basedir to check. */
private File basedir;
/** Linkcheck Cache. */
private File linkCheckCache;
/**
* To exclude some links. Could contains a link, i.e. http://maven.apache.org
,
* or pattern links i.e. http://maven.apache.org/**/*.html
*/
private String[] excludedLinks = null;
/** To exclude some pages. */
private String[] excludedPages = null;
/**
* Excluded http errors only in on line mode.
*
* @see {@link HttpStatus} for all defined values.
*/
private int[] excludedHttpStatusErrors = null;
/**
* Excluded http warnings only in on line mode.
*
* @see {@link HttpStatus} for all defined values.
*/
private int[] excludedHttpStatusWarnings = null;
/** Online mode. */
private boolean online;
/** Bean enncapsuling some https parameters */
private HttpBean http;
/** Internal LinkValidatorManager. */
private LinkValidatorManager lvm = null;
/** Report output file for xml document. */
private File reportOutput;
/** Report output encoding for the xml document, UTF-8 by default. */
private String reportOutputEncoding = "UTF-8";
/** The base URL for links that start with '/'. */
private String baseURL;
/** The encoding used to process files, UTF-8 by default. */
private String encoding = ReaderFactory.UTF_8;
// ----------------------------------------------------------------------
// Public methods
// ----------------------------------------------------------------------
/** {@inheritDoc} */
public void setBasedir( File base )
{
this.basedir = base;
}
/** {@inheritDoc} */
public void setBaseURL( String url )
{
this.baseURL = url;
}
/** {@inheritDoc} */
public void setExcludedHttpStatusErrors( int[] excl )
{
this.excludedHttpStatusErrors = excl;
}
/** {@inheritDoc} */
public void setExcludedHttpStatusWarnings( int[] excl )
{
this.excludedHttpStatusWarnings = excl;
}
/** {@inheritDoc} */
public void setExcludedLinks( String[] excl )
{
this.excludedLinks = excl;
}
/** {@inheritDoc} */
public void setExcludedPages( String[] excl )
{
this.excludedPages = excl;
}
/** {@inheritDoc} */
public void setHttp( HttpBean http )
{
this.http = http;
}
/** {@inheritDoc} */
public void setLinkCheckCache( File cacheFile )
{
this.linkCheckCache = cacheFile;
}
/** {@inheritDoc} */
public void setOnline( boolean onLine )
{
this.online = onLine;
}
/** {@inheritDoc} */
public void setReportOutput( File file )
{
this.reportOutput = file;
}
/** {@inheritDoc} */
public void setReportOutputEncoding( String encoding )
{
this.reportOutputEncoding = encoding;
}
/** {@inheritDoc} */
public LinkcheckModel execute()
throws LinkCheckException
{
if ( this.basedir == null )
{
LOG.error( "No base directory specified!" );
throw new NullPointerException( "The basedir can't be null!" );
}
if ( this.reportOutput == null )
{
LOG.warn( "No output file specified! Results will not be written!" );
}
LinkcheckModel model = new LinkcheckModel();
model.setModelEncoding( reportOutputEncoding );
model.setFiles( new LinkedList() );
displayMemoryConsumption();
LinkValidatorManager validator = getLinkValidatorManager();
try
{
validator.loadCache( this.linkCheckCache );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not load cache: " + e.getMessage(), e );
}
displayMemoryConsumption();
LOG.info( "Begin to check links in files..." );
try
{
findAndCheckFiles( this.basedir, model );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not scan base directory: " + basedir.getAbsolutePath(), e );
}
LOG.info( "Links checked." );
displayMemoryConsumption();
try
{
createDocument( model );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not write the linkcheck document: " + e.getMessage(), e );
}
try
{
validator.saveCache( this.linkCheckCache );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not save cache: " + e.getMessage(), e );
}
displayMemoryConsumption();
return model;
}
/** {@inheritDoc} */
public void setEncoding( String encoding )
{
if ( StringUtils.isEmpty( encoding ) )
{
throw new IllegalArgumentException( "encoding is required" );
}
try
{
Charset.forName( encoding );
}
catch ( UnsupportedCharsetException e )
{
throw new IllegalArgumentException( "encoding '" + encoding + "' is unsupported" );
}
this.encoding = encoding;
}
// ----------------------------------------------------------------------
// Private methods
// ----------------------------------------------------------------------
/**
* Whether links are checked in online mode.
*
* @return online
*/
private boolean isOnline()
{
return this.online;
}
/**
* Returns the excluded links.
* Could contains a link, i.e. http://maven.apache.org/
,
* or pattern links i.e. http://maven.apache.org/**/*.html
*
* @return String[]
*/
private String[] getExcludedLinks()
{
return this.excludedLinks;
}
/**
* Gets the comma separated list of effective exclude patterns.
*
* @return The comma separated list of effective exclude patterns, never null
.
*/
private String getExcludedPages()
{
LinkedList patternList = new LinkedList( FileUtils.getDefaultExcludesAsList() );
if ( excludedPages != null )
{
patternList.addAll( Arrays.asList( excludedPages ) );
}
return StringUtils.join( patternList.iterator(), "," );
}
/**
* Gets the comma separated list of effective include patterns.
*
* @return The comma separated list of effective include patterns, never null
.
*/
private String getIncludedPages()
{
return "**/*.html,**/*.htm";
}
/**
* Returns the excluded HTTP errors, i.e. 404
.
*
* @return int[]
* @see {@link HttpStatus} for all possible values.
*/
private int[] getExcludedHttpStatusErrors()
{
return this.excludedHttpStatusErrors;
}
/**
* Returns the excluded HTTP warnings, i.e. 301
.
*
* @return int[]
* @see {@link HttpStatus} for all possible values.
*/
private int[] getExcludedHttpStatusWarnings()
{
return this.excludedHttpStatusWarnings;
}
/**
* Returns the LinkValidatorManager.
* If this hasn't been set before with {@link #setLinkValidatorManager(LinkValidatorManager)}
* a default LinkValidatorManager will be returned.
*
* @return the LinkValidatorManager
*/
private LinkValidatorManager getLinkValidatorManager()
{
if ( this.lvm == null )
{
initDefaultLinkValidatorManager();
}
return this.lvm;
}
/**
* Intializes the current LinkValidatorManager to a default value.
*/
private void initDefaultLinkValidatorManager()
{
this.lvm = new LinkValidatorManager();
if ( getExcludedLinks() != null )
{
this.lvm.setExcludedLinks( getExcludedLinks() );
}
this.lvm.addLinkValidator( new FileLinkValidator( encoding ) );
if ( isOnline() )
{
OnlineHTTPLinkValidator olv = new OnlineHTTPLinkValidator( http );
if ( this.baseURL != null )
{
olv.setBaseURL( baseURL );
}
this.lvm.addLinkValidator( olv );
}
else
{
this.lvm.addLinkValidator( new OfflineHTTPLinkValidator() );
}
this.lvm.addLinkValidator( new MailtoLinkValidator() );
}
/**
* Recurses through the given base directory and adds/checks
* files to the model that pass through the current filter.
*
* @param base the base directory to traverse.
*/
private void findAndCheckFiles( File base, LinkcheckModel model )
throws IOException
{
Iterator files = FileUtils.getFiles( base, getIncludedPages(), getExcludedPages() ).iterator();
while( files.hasNext() )
{
checkFile( (File) files.next(), model );
}
}
private void checkFile( File file, LinkcheckModel model )
{
if ( LOG.isDebugEnabled() )
{
LOG.debug( " File - " + file );
}
String fileRelativePath = file.getAbsolutePath();
if ( fileRelativePath.startsWith( this.basedir.getAbsolutePath() ) )
{
fileRelativePath = fileRelativePath.substring( this.basedir.getAbsolutePath().length() + 1 );
}
fileRelativePath = fileRelativePath.replace( '\\', '/' );
LinkcheckFile linkcheckFile = new LinkcheckFile();
linkcheckFile.setAbsolutePath( file.getAbsolutePath() );
linkcheckFile.setRelativePath( fileRelativePath );
check( linkcheckFile );
model.addFile( linkcheckFile );
if ( ( model.getFiles().size() % 100 == 0 ) && LOG.isInfoEnabled() )
{
LOG.info( "Found " + model.getFiles().size() + " files so far." );
}
}
/**
* Validates a linkcheck file.
*
* @param linkcheckFile the linkcheckFile object to validate
*/
private void check( LinkcheckFile linkcheckFile )
{
linkcheckFile.setSuccessful( 0 );
linkcheckFile.setUnsuccessful( 0 );
if ( LOG.isDebugEnabled() )
{
LOG.debug( "Validating " + linkcheckFile.getRelativePath() );
}
final Set hrefs;
try
{
hrefs = LinkMatcher.match( new File( linkcheckFile.getAbsolutePath() ), encoding );
}
catch ( Throwable t )
{
// We catch Throwable, because there is a chance that the domReader will throw
// a stack overflow exception for some files
LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]" );
LOG.debug( t.getMessage(), t );
LinkcheckFileResult lcr = new LinkcheckFileResult();
lcr.setStatus( "PARSE FAILURE" );
lcr.setTarget( "N/A" );
linkcheckFile.addResult( lcr );
return;
}
String href;
LinkcheckFileResult lcr;
LinkValidationItem lvi;
LinkValidationResult result;
for ( Iterator iter = hrefs.iterator(); iter.hasNext(); )
{
href = (String) iter.next();
lcr = new LinkcheckFileResult();
lvi = new LinkValidationItem( new File( linkcheckFile.getAbsolutePath() ), href );
result = lvm.validateLink( lvi );
lcr.setTarget( href );
lcr.setErrorMessage( result.getErrorMessage() );
switch ( result.getStatus() )
{
case LinkcheckFileResult.VALID_LEVEL:
linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
lcr.setStatus( LinkcheckFileResult.VALID );
// At some point we won't want to store valid links. The tests require that we do at present.
linkcheckFile.addResult( lcr );
break;
case LinkcheckFileResult.ERROR_LEVEL:
boolean ignoredError = false;
if ( result instanceof HTTPLinkValidationResult )
{
HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
if ( httpResult.getHttpStatusCode() > 0
&& getExcludedHttpStatusErrors() != null
&& StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
toStringArray( getExcludedHttpStatusErrors() ) ) >= 0 )
{
ignoredError = true;
}
}
if ( ignoredError )
{
linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
}
else
{
linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
}
lcr.setStatus( ignoredError ? LinkcheckFileResult.VALID : LinkcheckFileResult.ERROR );
linkcheckFile.addResult( lcr );
break;
case LinkcheckFileResult.WARNING_LEVEL:
boolean ignoredWarning = false;
if ( result instanceof HTTPLinkValidationResult )
{
HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
if ( httpResult.getHttpStatusCode() > 0
&& getExcludedHttpStatusWarnings() != null
&& StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
toStringArray( getExcludedHttpStatusWarnings() ) ) >= 0 )
{
ignoredWarning = true;
}
}
if ( ignoredWarning )
{
linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
}
else
{
linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
}
lcr.setStatus( ignoredWarning ? LinkcheckFileResult.VALID : LinkcheckFileResult.WARNING );
linkcheckFile.addResult( lcr );
break;
case LinkcheckFileResult.UNKNOWN_LEVEL:
default:
linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
lcr.setStatus( LinkcheckFileResult.UNKNOWN );
linkcheckFile.addResult( lcr );
break;
}
}
href = null;
lcr = null;
lvi = null;
result = null;
}
/**
* Writes some memory data to the log (if debug enabled).
*/
private void displayMemoryConsumption()
{
if ( LOG.isDebugEnabled() )
{
Runtime r = Runtime.getRuntime();
LOG.debug( "Memory: " + ( r.totalMemory() - r.freeMemory() ) / MEG + "M/" + r.totalMemory() / MEG
+ "M" );
}
}
/**
* Create the XML document from the currently available details.
*
* @throws IOException if any
*/
private void createDocument( LinkcheckModel model )
throws IOException
{
if ( this.reportOutput == null )
{
return;
}
File dir = this.reportOutput.getParentFile();
if ( dir != null )
{
dir.mkdirs();
}
Writer writer = null;
LinkcheckModelXpp3Writer xpp3Writer = new LinkcheckModelXpp3Writer();
try
{
writer = WriterFactory.newXmlWriter( this.reportOutput );
xpp3Writer.write( writer, model );
}
catch ( IllegalStateException e )
{
IOException ioe =
new IOException( e.getMessage() + " Maybe try to specify an other encoding instead of '"
+ encoding + "'." );
ioe.initCause( e );
throw ioe;
}
finally
{
IOUtil.close( writer );
}
dir = null;
}
private static String[] toStringArray( int[] array )
{
if ( array == null )
{
throw new IllegalArgumentException( "array could not be null" );
}
String[] result = new String[array.length];
for ( int i = 0; i < array.length; i++ )
{
result[i] = String.valueOf( array[i] );
}
return result;
}
}