All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.maven.doxia.DefaultConverter Maven / Gradle / Ivy

There is a newer version: 1.3
Show newest version
package org.apache.maven.doxia;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.BufferedInputStream;
import java.io.CharArrayWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.maven.doxia.logging.Log;
import org.apache.maven.doxia.logging.SystemStreamLog;
import org.apache.maven.doxia.parser.ParseException;
import org.apache.maven.doxia.parser.Parser;
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkFactory;
import org.apache.maven.doxia.util.ConverterUtil;
import org.apache.maven.doxia.wrapper.InputFileWrapper;
import org.apache.maven.doxia.wrapper.InputReaderWrapper;
import org.apache.maven.doxia.wrapper.OutputFileWrapper;
import org.apache.maven.doxia.wrapper.OutputStreamWrapper;
import org.codehaus.plexus.ContainerConfiguration;
import org.codehaus.plexus.DefaultContainerConfiguration;
import org.codehaus.plexus.DefaultPlexusContainer;
import org.codehaus.plexus.PlexusContainer;
import org.codehaus.plexus.PlexusContainerException;
import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
import org.codehaus.plexus.util.FileUtils;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.ReaderFactory;
import org.codehaus.plexus.util.SelectorUtils;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.WriterFactory;
import org.codehaus.plexus.util.xml.XmlStreamReader;
import org.codehaus.plexus.util.xml.XmlUtil;
import org.codehaus.plexus.util.xml.pull.MXParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParserException;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;

/**
 * Default implementation of Converter
 *
 * @author Vincent Siveton
 * @version $Id: DefaultConverter.java 784074 2009-06-12 11:14:35Z ltheussl $
 */
public class DefaultConverter
    implements Converter
{
    private static final String APT_PARSER = "apt";

    private static final String CONFLUENCE_PARSER = "confluence";

    private static final String DOCBOOK_PARSER = "docbook";

    private static final String FML_PARSER = "fml";

    private static final String TWIKI_PARSER = "twiki";

    private static final String XDOC_PARSER = "xdoc";

    private static final String XHTML_PARSER = "xhtml";

    /** Supported input format, i.e. supported Doxia parser */
    public static final String[] SUPPORTED_FROM_FORMAT =
        { APT_PARSER, CONFLUENCE_PARSER, DOCBOOK_PARSER, FML_PARSER, TWIKI_PARSER, XDOC_PARSER, XHTML_PARSER };

    private static final String APT_SINK = "apt";

    private static final String CONFLUENCE_SINK = "confluence";

    private static final String DOCBOOK_SINK = "docbook";

    private static final String FO_SINK = "fo";

    private static final String ITEXT_SINK = "itext";

    private static final String LATEX_SINK = "latex";

    private static final String RTF_SINK = "rtf";

    private static final String TWIKI_SINK = "twiki";

    private static final String XDOC_SINK = "xdoc";

    private static final String XHTML_SINK = "xhtml";

    /** Supported output format, i.e. supported Doxia Sink */
    public static final String[] SUPPORTED_TO_FORMAT =
        { APT_SINK, CONFLUENCE_SINK, DOCBOOK_SINK, FO_SINK, ITEXT_SINK, LATEX_SINK, RTF_SINK, TWIKI_SINK, XDOC_SINK,
            XHTML_SINK };

    /** Flag to format the generated files, actually only for XML based sinks. */
    private boolean formatOutput;

    /** Plexus container */
    private PlexusContainer plexus;

    /** Doxia logger */
    private Log log;

    /** {@inheritDoc} */
    public void enableLogging( Log log )
    {
        this.log = log;
    }

    /**
     * Returns a logger for this sink.
     * If no logger has been configured, a new SystemStreamLog is returned.
     *
     * @return Log
     */
    protected Log getLog()
    {
        if ( log == null )
        {
            log = new SystemStreamLog();
        }

        return log;
    }

    /** {@inheritDoc} */
    public String[] getInputFormats()
    {
        return SUPPORTED_FROM_FORMAT;
    }

    /** {@inheritDoc} */
    public String[] getOutputFormats()
    {
        return SUPPORTED_TO_FORMAT;
    }

    /** {@inheritDoc} */
    public void convert( InputFileWrapper input, OutputFileWrapper output )
        throws UnsupportedFormatException, ConverterException
    {
        if ( input == null )
        {
            throw new IllegalArgumentException( "input is required" );
        }
        if ( output == null )
        {
            throw new IllegalArgumentException( "output is required" );
        }

        try
        {
            startPlexusContainer();
        }
        catch ( PlexusContainerException e )
        {
            throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
        }

        try
        {
            if ( input.getFile().isFile() )
            {
                parse( input.getFile(), input.getEncoding(), input.getFormat(), output );
            }
            else
            {
                List files;
                try
                {
                    files =
                        FileUtils.getFiles( input.getFile(), "**/*." + input.getFormat(),
                                            StringUtils.join( FileUtils.getDefaultExcludes(), ", " ) );
                }
                catch ( IOException e )
                {
                    throw new ConverterException( "IOException: " + e.getMessage(), e );
                }
                catch ( IllegalStateException e )
                {
                    throw new ConverterException( "IllegalStateException: " + e.getMessage(), e );
                }

                for ( Iterator it = files.iterator(); it.hasNext(); )
                {
                    File f = (File) it.next();

                    parse( f, input.getEncoding(), input.getFormat(), output );
                }
            }
        }
        finally
        {
            stopPlexusContainer();
        }
    }

    /** {@inheritDoc} */
    public void convert( InputReaderWrapper input, OutputStreamWrapper output )
        throws UnsupportedFormatException, ConverterException
    {
        if ( input == null )
        {
            throw new IllegalArgumentException( "input is required" );
        }
        if ( output == null )
        {
            throw new IllegalArgumentException( "output is required" );
        }

        try
        {
            startPlexusContainer();
        }
        catch ( PlexusContainerException e )
        {
            throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
        }

        try
        {
            Parser parser;
            try
            {
                parser = ConverterUtil.getParser( plexus, input.getFormat(), SUPPORTED_FROM_FORMAT );
                parser.enableLogging( log );
            }
            catch ( ComponentLookupException e )
            {
                throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
            }

            if ( getLog().isDebugEnabled() )
            {
                getLog().debug( "Parser used: " + parser.getClass().getName() );
            }

            SinkFactory sinkFactory;
            try
            {
                sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
            }
            catch ( ComponentLookupException e )
            {
                throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
            }

            Sink sink;
            try
            {
                sink = sinkFactory.createSink( output.getOutputStream(), output.getEncoding() );
            }
            catch ( IOException e )
            {
                throw new ConverterException( "IOException: " + e.getMessage(), e );
            }
            sink.enableLogging( log );

            if ( getLog().isDebugEnabled() )
            {
                getLog().debug( "Sink used: " + sink.getClass().getName() );
            }

            parse( parser, input.getReader(), sink );
        }
        finally
        {
            stopPlexusContainer();
        }
    }

    /** {@inheritDoc} */
    public void setFormatOutput( boolean formatOutput )
    {
        this.formatOutput = formatOutput;
    }

    // ----------------------------------------------------------------------
    // Private methods
    // ----------------------------------------------------------------------

    /**
     * @param inputFile a not null existing file.
     * @param inputEncoding a not null supported encoding or {@link InputFileWrapper#AUTO_ENCODING}
     * @param inputFormat  a not null supported format or {@link InputFileWrapper#AUTO_FORMAT}
     * @param output not null OutputFileWrapper object
     * @throws ConverterException if any
     * @throws UnsupportedFormatException if any
     */
    private void parse( File inputFile, String inputEncoding, String inputFormat, OutputFileWrapper output )
        throws ConverterException, UnsupportedFormatException
    {
        if ( getLog().isDebugEnabled() )
        {
            getLog().debug(
                            "Parsing file from '" + inputFile.getAbsolutePath() + "' with the encoding '"
                                + inputEncoding + "' to '" + output.getFile().getAbsolutePath()
                                + "' with the encoding '" + output.getEncoding() + "'" );
        }

        if ( inputEncoding.equals( InputFileWrapper.AUTO_ENCODING ) )
        {
            inputEncoding = autoDetectEncoding( inputFile );
            if ( getLog().isDebugEnabled() )
            {
                getLog().debug( "Auto detect encoding: " + inputEncoding );
            }
        }

        if ( inputFormat.equals( InputFileWrapper.AUTO_FORMAT ) )
        {
            inputFormat = autoDetectFormat( inputFile, inputEncoding );
            if ( getLog().isDebugEnabled() )
            {
                getLog().debug( "Auto detect input format: " + inputFormat );
            }
        }

        Parser parser;
        try
        {
            parser = ConverterUtil.getParser( plexus, inputFormat, SUPPORTED_FROM_FORMAT );
            parser.enableLogging( log );
        }
        catch ( ComponentLookupException e )
        {
            throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
        }

        File outputFile;
        if ( output.getFile().exists() && output.getFile().isDirectory() )
        {
            outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
        }
        else
        {
            if ( !SelectorUtils.match( "**.*", output.getFile().getName() ) )
            {
                // assume it is a directory
                output.getFile().mkdirs();
                outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
            }
            else
            {
                output.getFile().getParentFile().mkdirs();
                outputFile = output.getFile();
            }
        }

        Reader reader;
        try
        {
            if ( inputEncoding != null )
            {
                if ( parser.getType() == Parser.XML_TYPE )
                {
                    reader = ReaderFactory.newXmlReader( inputFile );
                }
                else
                {
                    reader = ReaderFactory.newReader( inputFile, inputEncoding );
                }
            }
            else
            {
                reader = ReaderFactory.newPlatformReader( inputFile );
            }
        }
        catch ( IOException e )
        {
            throw new ConverterException( "IOException: " + e.getMessage(), e );
        }

        SinkFactory sinkFactory;
        try
        {
            sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
        }
        catch ( ComponentLookupException e )
        {
            throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
        }

        Sink sink;
        try
        {
            String outputEncoding;
            if ( StringUtils.isEmpty( output.getEncoding() )
                || output.getEncoding().equals( OutputFileWrapper.AUTO_ENCODING ) )
            {
                outputEncoding = inputEncoding;
            }
            else
            {
                outputEncoding = output.getEncoding();
            }

            OutputStream out = new FileOutputStream( outputFile );
            sink = sinkFactory.createSink( out, outputEncoding );
        }
        catch ( IOException e )
        {
            throw new ConverterException( "IOException: " + e.getMessage(), e );
        }

        sink.enableLogging( log );

        if ( getLog().isDebugEnabled() )
        {
            getLog().debug( "Sink used: " + sink.getClass().getName() );
        }

        parse( parser, reader, sink );

        if ( formatOutput && ( output.getFormat().equals( DOCBOOK_SINK ) || output.getFormat().equals( FO_SINK )
            || output.getFormat().equals( ITEXT_SINK ) || output.getFormat().equals( XDOC_SINK )
            || output.getFormat().equals( XHTML_SINK ) ) )
        {
            // format all xml files excluding docbook which is buggy
            // TODO Add doc book format
            if ( output.getFormat().equals( DOCBOOK_SINK ) || inputFormat.equals( DOCBOOK_PARSER ) )
            {
                return;
            }
            Reader r = null;
            Writer w = null;
            try
            {
                r = ReaderFactory.newXmlReader( outputFile );
                CharArrayWriter caw = new CharArrayWriter();
                XmlUtil.prettyFormat( r, caw );
                w = WriterFactory.newXmlWriter( outputFile );
                w.write( caw.toString() );
            }
            catch ( IOException e )
            {
                throw new ConverterException( "IOException: " + e.getMessage(), e );
            }
            finally
            {
                IOUtil.close( r );
                IOUtil.close( w );
            }
        }
    }

    /**
     * @param parser not null
     * @param reader not null
     * @param sink not null
     * @throws ConverterException if any
     */
    private void parse( Parser parser, Reader reader, Sink sink )
        throws ConverterException
    {
        try
        {
            parser.parse( reader, sink );
        }
        catch ( ParseException e )
        {
            throw new ConverterException( "ParseException: " + e.getMessage(), e );
        }
        finally
        {
            IOUtil.close( reader );
            sink.flush();
            sink.close();
        }
    }

    /**
     * Start the Plexus container.
     *
     * @throws PlexusContainerException if any
     */
    private void startPlexusContainer()
        throws PlexusContainerException
    {
        if ( plexus != null )
        {
            return;
        }

        Map context = new HashMap();
        context.put( "basedir", new File( "" ).getAbsolutePath() );

        ContainerConfiguration containerConfiguration = new DefaultContainerConfiguration();
        containerConfiguration.setName( "Doxia" );
        containerConfiguration.setContext( context );

        plexus = new DefaultPlexusContainer( containerConfiguration );
    }

    /**
     * Stop the Plexus container.
     */
    private void stopPlexusContainer()
    {
        if ( plexus == null )
        {
            return;
        }

        plexus.dispose();
        plexus = null;
    }

    /**
     * @param f not null file
     * @return the detected encoding for f or null if not able to detect it.
     * @throws IllegalArgumentException if f is not a file.
     * @throws UnsupportedOperationException if could not detect the file encoding.
     * @see {@link XmlStreamReader#getEncoding()} for xml files
     * @see {@link CharsetDetector#detect()} for text files
     */
    private static String autoDetectEncoding( File f )
    {
        if ( !f.isFile() )
        {
            throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
                + "' is not a file, could not detect encoding." );
        }

        Reader reader = null;
        InputStream is = null;
        try
        {
            if ( XmlUtil.isXml( f ) )
            {
                reader = ReaderFactory.newXmlReader( f );
                return ( (XmlStreamReader) reader ).getEncoding();
            }

            is = new BufferedInputStream( new FileInputStream( f ) );
            CharsetDetector detector = new CharsetDetector();
            detector.setText( is );
            CharsetMatch match = detector.detect();

            return match.getName().toUpperCase( Locale.ENGLISH );
        }
        catch ( IOException e )
        {
            // nop
        }
        finally
        {
            IOUtil.close( reader );
            IOUtil.close( is );
        }

        StringBuffer msg = new StringBuffer();
        msg.append( "Could not detect the encoding for file: " );
        msg.append( f.getAbsolutePath() );
        msg.append( "\n Specify explicitly the encoding." );
        throw new UnsupportedOperationException( msg.toString() );
    }

    /**
     * Auto detect Doxia format for the given file depending:
     * 
    *
  • the file name for TextMarkup based Doxia files
  • *
  • the file content for XMLMarkup based Doxia files
  • *
* * @param f not null file * @param encoding a not null encoding. * @return the detected encoding from f. * @throws IllegalArgumentException if f is not a file. * @throws UnsupportedOperationException if could not detect the Doxia format. */ private static String autoDetectFormat( File f, String encoding ) { if ( !f.isFile() ) { throw new IllegalArgumentException( "The file '" + f.getAbsolutePath() + "' is not a file, could not detect format." ); } for ( int i = 0; i < SUPPORTED_FROM_FORMAT.length; i++ ) { String supportedFromFormat = SUPPORTED_FROM_FORMAT[i]; // Handle Doxia text files if ( supportedFromFormat.equalsIgnoreCase( APT_PARSER ) && isDoxiaFileName( f, supportedFromFormat ) ) { return supportedFromFormat; } else if ( supportedFromFormat.equalsIgnoreCase( CONFLUENCE_PARSER ) && isDoxiaFileName( f, supportedFromFormat ) ) { return supportedFromFormat; } else if ( supportedFromFormat.equalsIgnoreCase( TWIKI_PARSER ) && isDoxiaFileName( f, supportedFromFormat ) ) { return supportedFromFormat; } // Handle Doxia xml files String firstTag = getFirstTag( f ); if ( firstTag == null ) { continue; } else if ( firstTag.equals( "article" ) && supportedFromFormat.equalsIgnoreCase( DOCBOOK_PARSER ) ) { return supportedFromFormat; } else if ( firstTag.equals( "faqs" ) && supportedFromFormat.equalsIgnoreCase( FML_PARSER ) ) { return supportedFromFormat; } else if ( firstTag.equals( "document" ) && supportedFromFormat.equalsIgnoreCase( XDOC_PARSER ) ) { return supportedFromFormat; } else if ( firstTag.equals( "html" ) && supportedFromFormat.equalsIgnoreCase( XHTML_PARSER ) ) { return supportedFromFormat; } } StringBuffer msg = new StringBuffer(); msg.append( "Could not detect the Doxia format for file: " ); msg.append( f.getAbsolutePath() ); msg.append( "\n Specify explicitly the Doxia format." ); throw new UnsupportedOperationException( msg.toString() ); } /** * @param f not null * @param format could be null * @return true if the file name computes the format. */ private static boolean isDoxiaFileName( File f, String format ) { if ( f == null ) { throw new IllegalArgumentException( "f is required." ); } Pattern pattern = Pattern.compile( "(.*?)\\." + format.toLowerCase( Locale.ENGLISH ) + "$" ); Matcher matcher = pattern.matcher( f.getName().toLowerCase( Locale.ENGLISH ) ); return matcher.matches(); } /** * @param xmlFile not null and should be a file. * @return the first tag name if found, null in other case. */ private static String getFirstTag( File xmlFile ) { if ( xmlFile == null ) { throw new IllegalArgumentException( "xmlFile is required." ); } if ( !xmlFile.isFile() ) { throw new IllegalArgumentException( "The file '" + xmlFile.getAbsolutePath() + "' is not a file." ); } Reader reader = null; try { reader = ReaderFactory.newXmlReader( xmlFile ); XmlPullParser parser = new MXParser(); parser.setInput( reader ); int eventType = parser.getEventType(); while ( eventType != XmlPullParser.END_DOCUMENT ) { if ( eventType == XmlPullParser.START_TAG ) { return parser.getName(); } eventType = parser.nextToken(); } } catch ( FileNotFoundException e ) { return null; } catch ( XmlPullParserException e ) { return null; } catch ( IOException e ) { return null; } finally { IOUtil.close( reader ); } return null; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy