All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.maven.doxia.linkcheck.DefaultLinkCheck Maven / Gradle / Ivy

There is a newer version: 1.2
Show newest version
package org.apache.maven.doxia.linkcheck;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
import org.apache.maven.doxia.linkcheck.model.io.xpp3.LinkcheckModelXpp3Writer;
import org.apache.maven.doxia.linkcheck.validation.FileLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.HTTPLinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationItem;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidatorManager;
import org.apache.maven.doxia.linkcheck.validation.MailtoLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OfflineHTTPLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.ReaderFactory;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.WriterFactory;

/**
 * The main bean to be called whenever a set of documents should have their links checked.
 *
 * @author Ben Walding
 * @author Carlos Sanchez
 * @author Arnaud Heritier
 * @author Vincent Siveton
 * @version $Id: DefaultLinkCheck.java 709894 2008-11-02 16:42:06Z hboutemy $
 *
 * @plexus.component role="org.apache.maven.doxia.linkcheck.LinkCheck" role-hint="default"
 */
public final class DefaultLinkCheck
    implements LinkCheck
{
    /** Log. */
    private static final Log LOG = LogFactory.getLog( DefaultLinkCheck.class );

    /** FilenameFilter. */
    private static final FilenameFilter CUSTOM_FF = new DefaultLinkCheck.CustomFilenameFilter();

    /** One MegaByte. */
    private static final long MEG = 1024 * 1024;

    /** The basedir to check. */
    private File basedir;

    /** Linkcheck Cache. */
    private File linkCheckCache;

    /**
     * To exclude some links. Could contains a link, i.e. http://maven.apache.org,
     * or pattern links i.e. http://maven.apache.org/**/*.html
     */
    private String[] excludedLinks = null;

    /** To exclude some pages. */
    private String[] excludedPages = null;

    /**
     * Excluded http errors only in on line mode.
     *
     * @see {@link HttpStatus} for all defined values.
     */
    private int[] excludedHttpStatusErrors = null;

    /**
     * Excluded http warnings only in on line mode.
     *
     * @see {@link HttpStatus} for all defined values.
     */
    private int[] excludedHttpStatusWarnings = null;

    /** Online mode. */
    private boolean online;

    /** Bean enncapsuling some https parameters */
    private HttpBean http;

    /** Internal LinkValidatorManager. */
    private LinkValidatorManager lvm = null;

    /** Report output file for xml document. */
    private File reportOutput;

    /** Report output encoding for the xml document, UTF-8 by default. */
    private String reportOutputEncoding = "UTF-8";

    /** The base URL for links that start with '/'. */
    private String baseURL;

    /** The linkcheck model */
    private LinkcheckModel model = new LinkcheckModel();

    /** The encoding used to process files, UTF-8 by default. */
    private String encoding = ReaderFactory.UTF_8;

    // ----------------------------------------------------------------------
    // Public methods
    // ----------------------------------------------------------------------

    /** {@inheritDoc} */
    public void setBasedir( File base )
    {
        this.basedir = base;
    }

    /** {@inheritDoc} */
    public void setBaseURL( String url )
    {
        this.baseURL = url;
    }

    /** {@inheritDoc} */
    public void setExcludedHttpStatusErrors( int[] excl )
    {
        this.excludedHttpStatusErrors = excl;
    }

    /** {@inheritDoc} */
    public void setExcludedHttpStatusWarnings( int[] excl )
    {
        this.excludedHttpStatusWarnings = excl;
    }

    /** {@inheritDoc} */
    public void setExcludedLinks( String[] excl )
    {
        this.excludedLinks = excl;
    }

    /** {@inheritDoc} */
    public void setExcludedPages( String[] excl )
    {
        this.excludedPages = excl;
    }

    /** {@inheritDoc} */
    public void setHttp( HttpBean http )
    {
        this.http = http;
    }

    /** {@inheritDoc} */
    public void setLinkCheckCache( File cacheFile )
    {
        this.linkCheckCache = cacheFile;
    }

    /** {@inheritDoc} */
    public void setOnline( boolean onLine )
    {
        this.online = onLine;
    }

    /** {@inheritDoc} */
    public void setReportOutput( File file )
    {
        this.reportOutput = file;
    }

    /** {@inheritDoc} */
    public void setReportOutputEncoding( String encoding )
    {
        this.reportOutputEncoding = encoding;
    }

    /** {@inheritDoc} */
    public LinkcheckModel execute()
        throws LinkCheckException
    {
        if ( this.basedir == null )
        {
            if ( LOG.isErrorEnabled() )
            {
                LOG.error( "No base directory specified!" );
            }

            throw new NullPointerException( "The basedir can't be null!" );
        }

        if ( this.reportOutput == null )
        {
            if ( LOG.isWarnEnabled() )
            {
                LOG.warn( "No output file specified! Results will not be written!" );
            }
        }

        model = new LinkcheckModel();
        model.setModelEncoding( reportOutputEncoding );
        model.setFiles( new LinkedList() );

        displayMemoryConsumption();

        LinkValidatorManager validator = getLinkValidatorManager();
        try
        {
            validator.loadCache( this.linkCheckCache );
        }
        catch ( IOException e )
        {
            throw new LinkCheckException( "Could not load cache: " + e.getMessage(), e );
        }

        displayMemoryConsumption();

        if ( LOG.isInfoEnabled() )
        {
            LOG.info( "Begin to check links in files..." );
        }

        findAndCheckFiles( this.basedir );

        if ( LOG.isInfoEnabled() )
        {
            LOG.info( "Links checked." );
        }

        displayMemoryConsumption();

        try
        {
            createDocument();
        }
        catch ( IOException e )
        {
            throw new LinkCheckException( "Could not write the linkcheck document: " + e.getMessage(), e );
        }

        try
        {
            validator.saveCache( this.linkCheckCache );
        }
        catch ( IOException e )
        {
            throw new LinkCheckException( "Could not save cache: " + e.getMessage(), e );
        }

        displayMemoryConsumption();

        return model;
    }

    /** {@inheritDoc} */
    public void setEncoding( String encoding )
    {
        if ( StringUtils.isEmpty( encoding ) )
        {
            throw new IllegalArgumentException( "encoding is required" );
        }
        try
        {
            Charset.forName( encoding );
        }
        catch ( UnsupportedCharsetException e )
        {
            throw new IllegalArgumentException( "encoding '" + encoding + "' is unsupported" );
        }

        this.encoding = encoding;
    }

    // ----------------------------------------------------------------------
    // Private methods
    // ----------------------------------------------------------------------

    /**
     * Whether links are checked in online mode.
     *
     * @return online
     */
    private boolean isOnline()
    {
        return this.online;
    }

    /**
     * Get the base directory for the files to be linkchecked.
     *
     * @return the base directory
     */
    private File getBasedir()
    {
        return this.basedir;
    }

    /**
     * Returns the excluded links.
     * Could contains a link, i.e. http://maven.apache.org/,
     * or pattern links i.e. http://maven.apache.org/**/*.html
     *
     * @return String[]
     */
    private String[] getExcludedLinks()
    {
        return this.excludedLinks;
    }

    /**
     * Returns the excluded pages.
     *
     * @return String[]
     */
    private String[] getExcludedPages()
    {
        return this.excludedPages;
    }

    /**
     * Returns the excluded HTTP errors, i.e. 404.
     *
     * @return int[]
     * @see {@link HttpStatus} for all possible values.
     */
    private int[] getExcludedHttpStatusErrors()
    {
        return this.excludedHttpStatusErrors;
    }

    /**
     * Returns the excluded HTTP warnings, i.e. 301.
     *
     * @return int[]
     * @see {@link HttpStatus} for all possible values.
     */
    private int[] getExcludedHttpStatusWarnings()
    {
        return this.excludedHttpStatusWarnings;
    }

    /**
     * The model.
     *
     * @return the model.
     */
    private LinkcheckModel getModel()
    {
        return model;
    }

    /**
     * Sets the LinkValidatorManager.
     *
     * @param validator the LinkValidatorManager to set
     */
    private void setLinkValidatorManager( LinkValidatorManager validator )
    {
        this.lvm = validator;
    }

    /**
     * Returns the LinkValidatorManager.
     * If this hasn't been set before with {@link #setLinkValidatorManager(LinkValidatorManager)}
     * a default LinkValidatorManager will be returned.
     *
     * @return the LinkValidatorManager
     */
    private LinkValidatorManager getLinkValidatorManager()
    {
        if ( this.lvm == null )
        {
            initDefaultLinkValidatorManager();
        }

        return this.lvm;
    }

    /**
     * Intializes the current LinkValidatorManager to a default value.
     */
    private void initDefaultLinkValidatorManager()
    {
        this.lvm = new LinkValidatorManager();

        if ( getExcludedLinks() != null )
        {
            this.lvm.setExcludedLinks( getExcludedLinks() );
        }

        this.lvm.addLinkValidator( new FileLinkValidator( encoding ) );

        if ( isOnline() )
        {
            OnlineHTTPLinkValidator olv = new OnlineHTTPLinkValidator( http );

            if ( this.baseURL != null )
            {
                olv.setBaseURL( baseURL );
            }

            this.lvm.addLinkValidator( olv );
        }
        else
        {
            this.lvm.addLinkValidator( new OfflineHTTPLinkValidator() );
        }

        this.lvm.addLinkValidator( new MailtoLinkValidator() );
    }

    /**
     * Recurses through the given base directory and adds/checks
     * files to the model that pass through the current filter.
     *
     * @param base the base directory to traverse.
     */
    private void findAndCheckFiles( File base )
    {
        File[] f = base.listFiles( CUSTOM_FF );

        if ( f != null )
        {
            File file;
            for ( int i = 0; i < f.length; i++ )
            {
                file = f[i];

                if ( file.isDirectory() )
                {
                    findAndCheckFiles( file );
                }
                else
                {
                    if ( LOG.isDebugEnabled() )
                    {
                        LOG.debug( " File - " + file );
                    }

                    if ( getExcludedPages() != null )
                    {
                        String diff = StringUtils.difference( getBasedir().getAbsolutePath(), file.getAbsolutePath() );
                        if ( diff.startsWith( File.separator ) )
                        {
                            diff = diff.substring( 1 );
                        }

                        if ( Arrays.binarySearch( getExcludedPages(), diff ) >= 0 )
                        {
                            if ( LOG.isDebugEnabled() )
                            {
                                LOG.debug( " Ignored analysis of " + file );
                            }

                            continue;
                        }
                    }

                    String fileRelativePath = file.getAbsolutePath();
                    if ( fileRelativePath.startsWith( this.basedir.getAbsolutePath() ) )
                    {
                        fileRelativePath = fileRelativePath.substring( this.basedir.getAbsolutePath().length() + 1 );
                    }
                    fileRelativePath = fileRelativePath.replace( '\\', '/' );

                    LinkcheckFile linkcheckFile = new LinkcheckFile();
                    linkcheckFile.setAbsolutePath( file.getAbsolutePath() );
                    linkcheckFile.setRelativePath( fileRelativePath );

                    check( linkcheckFile );

                    model.addFile( linkcheckFile );

                    if ( model.getFiles().size() % 100 == 0 )
                    {
                        if ( LOG.isInfoEnabled() )
                        {
                            LOG.info( "Found " + model.getFiles().size() + " files so far." );
                        }
                    }
                }
            }

            file = null;
        }

        f = null;
    }

    /**
     * Validates a linkcheck file.
     *
     * @param linkcheckFile the linkcheckFile object to validate
     */
    private void check( LinkcheckFile linkcheckFile )
    {
        linkcheckFile.setSuccessful( 0 );

        linkcheckFile.setUnsuccessful( 0 );

        if ( LOG.isDebugEnabled() )
        {
            LOG.debug( "Validating " + linkcheckFile.getRelativePath() );
        }

        final Set hrefs;

        try
        {
            hrefs = LinkMatcher.match( new File( linkcheckFile.getAbsolutePath() ), encoding );
        }
        catch ( Throwable t )
        {
            // We catch Throwable, because there is a chance that the domReader will throw
            // a stack overflow exception for some files

            if ( LOG.isDebugEnabled() )
            {
                LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]", t );
            }
            else
            {
                LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]" );
            }

            LinkcheckFileResult lcr = new LinkcheckFileResult();

            lcr.setStatus( "PARSE FAILURE" );

            lcr.setTarget( "N/A" );

            linkcheckFile.addResult( lcr );

            return;
        }

        String href;
        LinkcheckFileResult lcr;
        LinkValidationItem lvi;
        LinkValidationResult result;

        for ( Iterator iter = hrefs.iterator(); iter.hasNext(); )
        {
            href = (String) iter.next();

            lcr = new LinkcheckFileResult();
            lvi = new LinkValidationItem( new File( linkcheckFile.getAbsolutePath() ), href );
            result = lvm.validateLink( lvi );
            lcr.setTarget( href );
            lcr.setErrorMessage( result.getErrorMessage() );

            switch ( result.getStatus() )
            {
                case LinkcheckFileResult.VALID_LEVEL:
                    linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );

                    lcr.setStatus( LinkcheckFileResult.VALID );

                    // At some point we won't want to store valid links. The tests require that we do at present.
                    linkcheckFile.addResult( lcr );

                    break;
                case LinkcheckFileResult.ERROR_LEVEL:
                    boolean ignoredError = false;
                    if ( result instanceof HTTPLinkValidationResult )
                    {
                        HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;


                        if ( httpResult.getHttpStatusCode() > 0 && getExcludedHttpStatusErrors() != null
                            && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
                                                       toStringArray( getExcludedHttpStatusErrors() ) ) >= 0 )
                        {
                            ignoredError = true;
                        }
                    }

                    if ( ignoredError )
                    {
                        linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
                    }
                    else
                    {
                        linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
                    }

                    lcr.setStatus( ignoredError ? LinkcheckFileResult.VALID : LinkcheckFileResult.ERROR );

                    linkcheckFile.addResult( lcr );

                    break;
                case LinkcheckFileResult.WARNING_LEVEL:
                    boolean ignoredWarning = false;
                    if ( result instanceof HTTPLinkValidationResult )
                    {
                        HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;

                        if ( httpResult.getHttpStatusCode() > 0 && getExcludedHttpStatusWarnings() != null
                            && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
                                                       toStringArray( getExcludedHttpStatusWarnings() ) ) >= 0 )
                        {
                            ignoredWarning = true;
                        }
                    }

                    if ( ignoredWarning )
                    {
                        linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
                    }
                    else
                    {
                        linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
                    }

                    lcr.setStatus( ignoredWarning ? LinkcheckFileResult.VALID : LinkcheckFileResult.WARNING );

                    linkcheckFile.addResult( lcr );

                    break;
                case LinkcheckFileResult.UNKNOWN_LEVEL:
                default:
                    linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );

                    lcr.setStatus( LinkcheckFileResult.UNKNOWN );

                    linkcheckFile.addResult( lcr );

                    break;
            }
        }

        href = null;
        lcr = null;
        lvi = null;
        result = null;
    }

    /**
     * Writes some memory data to the log (if debug enabled).
     */
    private void displayMemoryConsumption()
    {
        if ( LOG.isDebugEnabled() )
        {
            Runtime r = Runtime.getRuntime();
            LOG.debug( "Memory: " + ( r.totalMemory() - r.freeMemory() ) / MEG + "M/" + r.totalMemory() / MEG + "M" );
        }
    }

    /**
     * Create the XML document from the currently available details.
     *
     * @throws IOException if any
     */
    private void createDocument()
        throws IOException
    {
        if ( this.reportOutput == null )
        {
            return;
        }

        File dir = this.reportOutput.getParentFile();
        if ( dir != null )
        {
            dir.mkdirs();
        }

        Writer writer = null;
        LinkcheckModelXpp3Writer xpp3Writer = new LinkcheckModelXpp3Writer();
        try
        {
            writer = WriterFactory.newXmlWriter( this.reportOutput );
            xpp3Writer.write( writer, getModel() );
        }
        catch ( IllegalStateException e )
        {
            IOException ioe =
                new IOException( e.getMessage() + " Maybe try to specify an other encoding instead of '" + encoding
                    + "'." );
            ioe.initCause( e );
            throw ioe;
        }
        finally
        {
            IOUtil.close( writer );
        }

        dir = null;
    }

    private static String[] toStringArray( int[] array )
    {
        if ( array == null )
        {
            throw new IllegalArgumentException( "array could not be null" );
        }

        String[] result = new String[array.length];
        for ( int i = 0; i < array.length; i++ )
        {
            result[i] = String.valueOf( array[i] );
        }
        return result;
    }

    /** Custom FilenameFilter used to search html files */
    static class CustomFilenameFilter
        implements FilenameFilter
    {
        /** {@inheritDoc} */
        public boolean accept( File dir, String name )
        {
            File n = new File( dir, name );

            if ( n.isDirectory() )
            {
                return true;
            }

            if ( name.toLowerCase( Locale.ENGLISH ).endsWith( ".html" )
                || name.toLowerCase( Locale.ENGLISH ).endsWith( ".htm" ) )
            {
                return true;
            }

            return false;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy