org.apache.maven.doxia.linkcheck.DefaultLinkCheck Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of doxia-linkcheck Show documentation
Doxia linkcheck is a tool to check the validity of links
The newest version!
package org.apache.maven.doxia.linkcheck;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
import org.apache.maven.doxia.linkcheck.model.io.xpp3.LinkcheckModelXpp3Writer;
import org.apache.maven.doxia.linkcheck.validation.FileLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.HTTPLinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationItem;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidatorManager;
import org.apache.maven.doxia.linkcheck.validation.MailtoLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OfflineHTTPLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator;

import org.codehaus.plexus.util.FileUtils;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.ReaderFactory;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.WriterFactory;

/**
 * The main bean to be called whenever a set of documents should have their links checked.
 *
 * @author Ben Walding
 * @author Carlos Sanchez
 * @author Arnaud Heritier
 * @author Vincent Siveton
 * @version $Id: DefaultLinkCheck.java 1002602 2010-09-29 13:09:25Z ltheussl $
 *
 * @plexus.component role="org.apache.maven.doxia.linkcheck.LinkCheck" role-hint="default"
 */
public final class DefaultLinkCheck
    implements LinkCheck
{
    /** Log. */
    private static final Log LOG = LogFactory.getLog( DefaultLinkCheck.class );

    /** One MegaByte. */
    private static final long MEG = 1024 * 1024;

    /** The basedir to check. */
    private File basedir;

    /** Linkcheck Cache. */
    private File linkCheckCache;

    /**
     * To exclude some links. Could contains a link, i.e. http://maven.apache.org,
     * or pattern links i.e. http://maven.apache.org/**/*.html
     */
    private String[] excludedLinks = null;

    /** To exclude some pages. */
    private String[] excludedPages = null;

    /**
     * Excluded http errors only in on line mode.
     *
     * @see {@link HttpStatus} for all defined values.
     */
    private int[] excludedHttpStatusErrors = null;

    /**
     * Excluded http warnings only in on line mode.
     *
     * @see {@link HttpStatus} for all defined values.
     */
    private int[] excludedHttpStatusWarnings = null;

    /** Online mode. */
    private boolean online;

    /** Bean enncapsuling some https parameters */
    private HttpBean http;

    /** Internal LinkValidatorManager. */
    private LinkValidatorManager lvm = null;

    /** Report output file for xml document. */
    private File reportOutput;

    /** Report output encoding for the xml document, UTF-8 by default. */
    private String reportOutputEncoding = "UTF-8";

    /** The base URL for links that start with '/'. */
    private String baseURL;

    /** The encoding used to process files, UTF-8 by default. */
    private String encoding = ReaderFactory.UTF_8;

    // ----------------------------------------------------------------------
    // Public methods
    // ----------------------------------------------------------------------

    /** {@inheritDoc} */
    public void setBasedir( File base )
    {
        this.basedir = base;
    }

    /** {@inheritDoc} */
    public void setBaseURL( String url )
    {
        this.baseURL = url;
    }

    /** {@inheritDoc} */
    public void setExcludedHttpStatusErrors( int[] excl )
    {
        this.excludedHttpStatusErrors = excl;
    }

    /** {@inheritDoc} */
    public void setExcludedHttpStatusWarnings( int[] excl )
    {
        this.excludedHttpStatusWarnings = excl;
    }

    /** {@inheritDoc} */
    public void setExcludedLinks( String[] excl )
    {
        this.excludedLinks = excl;
    }

    /** {@inheritDoc} */
    public void setExcludedPages( String[] excl )
    {
        this.excludedPages = excl;
    }

    /** {@inheritDoc} */
    public void setHttp( HttpBean http )
    {
        this.http = http;
    }

    /** {@inheritDoc} */
    public void setLinkCheckCache( File cacheFile )
    {
        this.linkCheckCache = cacheFile;
    }

    /** {@inheritDoc} */
    public void setOnline( boolean onLine )
    {
        this.online = onLine;
    }

    /** {@inheritDoc} */
    public void setReportOutput( File file )
    {
        this.reportOutput = file;
    }

    /** {@inheritDoc} */
    public void setReportOutputEncoding( String encoding )
    {
        this.reportOutputEncoding = encoding;
    }

    /** {@inheritDoc} */
    public LinkcheckModel execute()
        throws LinkCheckException
    {
        if ( this.basedir == null )
        {
            LOG.error( "No base directory specified!" );

            throw new NullPointerException( "The basedir can't be null!" );
        }

        if ( this.reportOutput == null )
        {
            LOG.warn( "No output file specified! Results will not be written!" );
        }

        LinkcheckModel model = new LinkcheckModel();
        model.setModelEncoding( reportOutputEncoding );
        model.setFiles( new LinkedList() );

        displayMemoryConsumption();

        LinkValidatorManager validator = getLinkValidatorManager();
        try
        {
            validator.loadCache( this.linkCheckCache );
        }
        catch ( IOException e )
        {
            throw new LinkCheckException( "Could not load cache: " + e.getMessage(), e );
        }

        displayMemoryConsumption();

        LOG.info( "Begin to check links in files..." );

        try
        {
            findAndCheckFiles( this.basedir, model );
        }
        catch ( IOException e )
        {
            throw new LinkCheckException( "Could not scan base directory: " + basedir.getAbsolutePath(), e );
        }

        LOG.info( "Links checked." );

        displayMemoryConsumption();

        try
        {
            createDocument( model );
        }
        catch ( IOException e )
        {
            throw new LinkCheckException( "Could not write the linkcheck document: " + e.getMessage(), e );
        }

        try
        {
            validator.saveCache( this.linkCheckCache );
        }
        catch ( IOException e )
        {
            throw new LinkCheckException( "Could not save cache: " + e.getMessage(), e );
        }

        displayMemoryConsumption();

        return model;
    }

    /** {@inheritDoc} */
    public void setEncoding( String encoding )
    {
        if ( StringUtils.isEmpty( encoding ) )
        {
            throw new IllegalArgumentException( "encoding is required" );
        }
        try
        {
            Charset.forName( encoding );
        }
        catch ( UnsupportedCharsetException e )
        {
            throw new IllegalArgumentException( "encoding '" + encoding + "' is unsupported" );
        }

        this.encoding = encoding;
    }

    // ----------------------------------------------------------------------
    // Private methods
    // ----------------------------------------------------------------------

    /**
     * Whether links are checked in online mode.
     *
     * @return online
     */
    private boolean isOnline()
    {
        return this.online;
    }

    /**
     * Returns the excluded links.
     * Could contains a link, i.e. http://maven.apache.org/,
     * or pattern links i.e. http://maven.apache.org/**/*.html
     *
     * @return String[]
     */
    private String[] getExcludedLinks()
    {
        return this.excludedLinks;
    }

    /**
     * Gets the comma separated list of effective exclude patterns.
     *
     * @return The comma separated list of effective exclude patterns, never null.
     */
    private String getExcludedPages()
    {
        LinkedList patternList = new LinkedList( FileUtils.getDefaultExcludesAsList() );

        if ( excludedPages != null )
        {
            patternList.addAll( Arrays.asList( excludedPages ) );
        }

        return StringUtils.join( patternList.iterator(), "," );
    }

    /**
     * Gets the comma separated list of effective include patterns.
     *
     * @return The comma separated list of effective include patterns, never null.
     */
    private String getIncludedPages()
    {
        return "**/*.html,**/*.htm";
    }

    /**
     * Returns the excluded HTTP errors, i.e. 404.
     *
     * @return int[]
     * @see {@link HttpStatus} for all possible values.
     */
    private int[] getExcludedHttpStatusErrors()
    {
        return this.excludedHttpStatusErrors;
    }

    /**
     * Returns the excluded HTTP warnings, i.e. 301.
     *
     * @return int[]
     * @see {@link HttpStatus} for all possible values.
     */
    private int[] getExcludedHttpStatusWarnings()
    {
        return this.excludedHttpStatusWarnings;
    }

    /**
     * Returns the LinkValidatorManager.
     * If this hasn't been set before with {@link #setLinkValidatorManager(LinkValidatorManager)}
     * a default LinkValidatorManager will be returned.
     *
     * @return the LinkValidatorManager
     */
    private LinkValidatorManager getLinkValidatorManager()
    {
        if ( this.lvm == null )
        {
            initDefaultLinkValidatorManager();
        }

        return this.lvm;
    }

    /**
     * Intializes the current LinkValidatorManager to a default value.
     */
    private void initDefaultLinkValidatorManager()
    {
        this.lvm = new LinkValidatorManager();

        if ( getExcludedLinks() != null )
        {
            this.lvm.setExcludedLinks( getExcludedLinks() );
        }

        this.lvm.addLinkValidator( new FileLinkValidator( encoding ) );

        if ( isOnline() )
        {
            OnlineHTTPLinkValidator olv = new OnlineHTTPLinkValidator( http );

            if ( this.baseURL != null )
            {
                olv.setBaseURL( baseURL );
            }

            this.lvm.addLinkValidator( olv );
        }
        else
        {
            this.lvm.addLinkValidator( new OfflineHTTPLinkValidator() );
        }

        this.lvm.addLinkValidator( new MailtoLinkValidator() );
    }

    /**
     * Recurses through the given base directory and adds/checks
     * files to the model that pass through the current filter.
     *
     * @param base the base directory to traverse.
     */
    private void findAndCheckFiles( File base, LinkcheckModel model )
        throws IOException
    {
        Iterator files = FileUtils.getFiles( base, getIncludedPages(), getExcludedPages() ).iterator();

        while( files.hasNext() )
        {
            checkFile( (File) files.next(), model );
        }
    }

    private void checkFile( File file, LinkcheckModel model )
    {
        if ( LOG.isDebugEnabled() )
        {
            LOG.debug( " File - " + file );
        }

        String fileRelativePath = file.getAbsolutePath();

        if ( fileRelativePath.startsWith( this.basedir.getAbsolutePath() ) )
        {
            fileRelativePath = fileRelativePath.substring( this.basedir.getAbsolutePath().length() + 1 );
        }

        fileRelativePath = fileRelativePath.replace( '\\', '/' );

        LinkcheckFile linkcheckFile = new LinkcheckFile();
        linkcheckFile.setAbsolutePath( file.getAbsolutePath() );
        linkcheckFile.setRelativePath( fileRelativePath );

        check( linkcheckFile );

        model.addFile( linkcheckFile );

        if ( ( model.getFiles().size() % 100 == 0 ) && LOG.isInfoEnabled() )
        {
            LOG.info( "Found " + model.getFiles().size() + " files so far." );
        }
    }

    /**
     * Validates a linkcheck file.
     *
     * @param linkcheckFile the linkcheckFile object to validate
     */
    private void check( LinkcheckFile linkcheckFile )
    {
        linkcheckFile.setSuccessful( 0 );

        linkcheckFile.setUnsuccessful( 0 );

        if ( LOG.isDebugEnabled() )
        {
            LOG.debug( "Validating " + linkcheckFile.getRelativePath() );
        }

        final Set hrefs;

        try
        {
            hrefs = LinkMatcher.match( new File( linkcheckFile.getAbsolutePath() ), encoding );
        }
        catch ( Throwable t )
        {
            // We catch Throwable, because there is a chance that the domReader will throw
            // a stack overflow exception for some files

            LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]" );
            LOG.debug( t.getMessage(), t );

            LinkcheckFileResult lcr = new LinkcheckFileResult();

            lcr.setStatus( "PARSE FAILURE" );

            lcr.setTarget( "N/A" );

            linkcheckFile.addResult( lcr );

            return;
        }

        String href;
        LinkcheckFileResult lcr;
        LinkValidationItem lvi;
        LinkValidationResult result;

        for ( Iterator iter = hrefs.iterator(); iter.hasNext(); )
        {
            href = (String) iter.next();

            lcr = new LinkcheckFileResult();
            lvi = new LinkValidationItem( new File( linkcheckFile.getAbsolutePath() ), href );
            result = lvm.validateLink( lvi );
            lcr.setTarget( href );
            lcr.setErrorMessage( result.getErrorMessage() );

            switch ( result.getStatus() )
            {
                case LinkcheckFileResult.VALID_LEVEL:
                    linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );

                    lcr.setStatus( LinkcheckFileResult.VALID );

                    // At some point we won't want to store valid links. The tests require that we do at present.
                    linkcheckFile.addResult( lcr );

                    break;
                case LinkcheckFileResult.ERROR_LEVEL:
                    boolean ignoredError = false;
                    if ( result instanceof HTTPLinkValidationResult )
                    {
                        HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;

                        if ( httpResult.getHttpStatusCode() > 0
                            && getExcludedHttpStatusErrors() != null
                            && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
                                                       toStringArray( getExcludedHttpStatusErrors() ) ) >= 0 )
                        {
                            ignoredError = true;
                        }
                    }

                    if ( ignoredError )
                    {
                        linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
                    }
                    else
                    {
                        linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
                    }

                    lcr.setStatus( ignoredError ? LinkcheckFileResult.VALID : LinkcheckFileResult.ERROR );

                    linkcheckFile.addResult( lcr );

                    break;
                case LinkcheckFileResult.WARNING_LEVEL:
                    boolean ignoredWarning = false;
                    if ( result instanceof HTTPLinkValidationResult )
                    {
                        HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;

                        if ( httpResult.getHttpStatusCode() > 0
                            && getExcludedHttpStatusWarnings() != null
                            && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
                                                       toStringArray( getExcludedHttpStatusWarnings() ) ) >= 0 )
                        {
                            ignoredWarning = true;
                        }
                    }

                    if ( ignoredWarning )
                    {
                        linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
                    }
                    else
                    {
                        linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
                    }

                    lcr.setStatus( ignoredWarning ? LinkcheckFileResult.VALID : LinkcheckFileResult.WARNING );

                    linkcheckFile.addResult( lcr );

                    break;
                case LinkcheckFileResult.UNKNOWN_LEVEL:
                default:
                    linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );

                    lcr.setStatus( LinkcheckFileResult.UNKNOWN );

                    linkcheckFile.addResult( lcr );

                    break;
            }
        }

        href = null;
        lcr = null;
        lvi = null;
        result = null;
    }

    /**
     * Writes some memory data to the log (if debug enabled).
     */
    private void displayMemoryConsumption()
    {
        if ( LOG.isDebugEnabled() )
        {
            Runtime r = Runtime.getRuntime();
            LOG.debug( "Memory: " + ( r.totalMemory() - r.freeMemory() ) / MEG + "M/" + r.totalMemory() / MEG
                + "M" );
        }
    }

    /**
     * Create the XML document from the currently available details.
     *
     * @throws IOException if any
     */
    private void createDocument( LinkcheckModel model )
        throws IOException
    {
        if ( this.reportOutput == null )
        {
            return;
        }

        File dir = this.reportOutput.getParentFile();
        if ( dir != null )
        {
            dir.mkdirs();
        }

        Writer writer = null;
        LinkcheckModelXpp3Writer xpp3Writer = new LinkcheckModelXpp3Writer();
        try
        {
            writer = WriterFactory.newXmlWriter( this.reportOutput );
            xpp3Writer.write( writer, model );
        }
        catch ( IllegalStateException e )
        {
            IOException ioe =
                new IOException( e.getMessage() + " Maybe try to specify an other encoding instead of '"
                    + encoding + "'." );
            ioe.initCause( e );
            throw ioe;
        }
        finally
        {
            IOUtil.close( writer );
        }

        dir = null;
    }

    private static String[] toStringArray( int[] array )
    {
        if ( array == null )
        {
            throw new IllegalArgumentException( "array could not be null" );
        }

        String[] result = new String[array.length];
        for ( int i = 0; i < array.length; i++ )
        {
            result[i] = String.valueOf( array[i] );
        }
        return result;
    }
}