All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.enhydra.xml.xmlc.compiler.Parse Maven / Gradle / Ivy

The newest version!
/*
 * Enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: Parse.java,v 1.3 2005/01/26 08:29:24 jkjome Exp $
 */

package org.enhydra.xml.xmlc.compiler;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;

import org.enhydra.xml.io.ErrorReporter;
import org.enhydra.xml.io.InputSourceOps;
import org.enhydra.xml.xmlc.XMLCError;
import org.enhydra.xml.xmlc.XMLCException;
import org.enhydra.xml.xmlc.dom.XMLCDocument;
import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
import org.enhydra.xml.xmlc.dom.XMLCDomFactoryCache;
import org.enhydra.xml.xmlc.html.parsers.swing.SwingHTMLParser;
import org.enhydra.xml.xmlc.html.parsers.tidy.TidyHTMLParser;
import org.enhydra.xml.xmlc.metadata.CompileOptions;
import org.enhydra.xml.xmlc.metadata.DocumentClass;
import org.enhydra.xml.xmlc.metadata.DocumentFormat;
import org.enhydra.xml.xmlc.metadata.InputDocument;
import org.enhydra.xml.xmlc.metadata.MetaData;
import org.enhydra.xml.xmlc.metadata.Parser;
import org.enhydra.xml.xmlc.metadata.ParserType;
import org.enhydra.xml.xmlc.misc.LineNumberMap;
import org.enhydra.xml.xmlc.misc.SSIReader;
import org.enhydra.xml.xmlc.parsers.ParseTracer;
import org.enhydra.xml.xmlc.parsers.XMLCParser;
import org.enhydra.xml.xmlc.parsers.xerces.XercesParser;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;


/**
 * Parse a XML or HTML document into a DOM.
 */
public class Parse {
    /**
     * XML parser object.
     */
    private XMLCParser fXMLCParser;

    /**
     * Error output.
     */
    private ErrorReporter fErrorReporter;

    /**
     * Verbose output stream.
     */
    private PrintWriter fVerboseOut;

    /**
     * Print verbose messages.
     */
    private boolean fVerbose;

    /**
     * Construct a new file parser.
     *
     * @param errorReporter Object used to handle errors.
     * @param verboseOut Output stream for verbose and trace information.
     */
    public Parse(ErrorReporter errorReporter,
                 PrintWriter verboseOut) {
        fErrorReporter = errorReporter;
        fVerboseOut = verboseOut;
    }

    /*
     * Is this an the name of an HTML parser?
     */
    private boolean isHtmlParser(ParserType parser) {
        return (parser == ParserType.SWING)
            || (parser == ParserType.TIDY);
    }

    /**
     * Determine the parser to use.
     */
    private void setupParser(MetaData metaData,
                             ParserType parser,
                             boolean isHtmlDocument)
        throws XMLCException, IOException {
        if (parser == null) {
            // Use default parser based on document type.
            if (isHtmlDocument) {
                parser = ParserType.TIDY;
            } else {
                parser = ParserType.XERCES;
            }
        }
        
        // Check for conflicts with parser and document type.
        if (isHtmlDocument) {
            if (!isHtmlParser(parser)) {
                throw new XMLCException("Document appears to be an HTML document; the "
                                        + parser + " parser only supports XML"
                                        + " (does the document start with `'?): "
                                        + getInputSourceDesc(metaData));
            }
        } else {
            if (isHtmlParser(parser)) {
                throw new XMLCException("Document appears to be an XML document; the "
                                        + parser + " parser only supports HTML: "
                                        + getInputSourceDesc(metaData));
            }
            if (metaData.getHTMLSection() != null) {
                //FIXME: need to include check for options being defaulted.
                //throw new XMLCException("HTML options may not be specified for a XML document");
            }
        }

        // Load and initialize parser.
        if (parser == ParserType.SWING) {
            fXMLCParser = new SwingHTMLParser();
        } else if (parser == ParserType.TIDY) {
            fXMLCParser = new TidyHTMLParser(); 
        } else if (parser == ParserType.XERCES) {
            fXMLCParser = new XercesParser();
        } else {
            throw new XMLCError("Unknown parser \"" + parser + "\"");
        }
    }

    /**
     * Determine if this is an XML or HTML document.  Its either explictly
     * specified or must be determined by looking at the file.
     */
    private boolean isXMLDocument(MetaData metaData) throws IOException {
        InputDocument inputDoc = metaData.getInputDocument();
        DocumentFormat docFormat = inputDoc.getDocumentFormat();
        
        if (docFormat == DocumentFormat.XML) {
            return true;
        } else if (docFormat == DocumentFormat.HTML) {
            return false;
        } else {
            return InputSourceOps.isXMLDocument(inputDoc.getInputSource());
        }
    }

    /**
     * Get the input source, handling SSI filtering.
     */
    private InputSource getInputSource(MetaData metaData) throws IOException {
        InputDocument inputDoc = metaData.getInputDocument();
        InputSource inputSource = inputDoc.getInputSource();
        if (inputDoc.getProcessSSI()) {
            // dbr_20020128.1_start
            //return SSIReader.create(inputSource);
            return SSIReader.create(inputSource, inputDoc.getSSIBase());
            // dbr_20020128.1_end
        } else {
            return inputSource;
        }
    }

    /** Get a description of the input source for error messages */
    private String getInputSourceDesc(MetaData metaData) {
        InputDocument inputDoc = metaData.getInputDocument();
        return InputSourceOps.getName(inputDoc.getInputSource());
    }
    
    /**
     * Get the line number map to pass to the parse, or null
     * if one is not in uses.
     */
    private LineNumberMap getLineNumberMap(InputSource input) {
        Reader reader = input.getCharacterStream();
        if (reader instanceof SSIReader) {
            return ((SSIReader)reader).getLineNumberMap();
        } else {
            return null;
        }
    }

    /*
     * Parse the page into the DOM and perform various checks and edits.
     *
     * @param metaData Document metadata.
     * @param verboseOut Write verbose and trace information output stream.
     */
    public XMLCDocument parse(MetaData metaData)
        throws XMLCException, IOException {

        Parser parser = metaData.getParser();
        CompileOptions compileOptions = metaData.getCompileOptions();
        DocumentClass documentClass = metaData.getDocumentClass();

        // Setup tracing
        if (fVerboseOut != null) {
            // Only enable verbose output if a stream is available
            // and its requested.
            fVerbose = compileOptions.getVerbose();
        }
        boolean printParseInfo
            = (compileOptions.getPrintParseInfo() && (fVerboseOut != null));

        ParseTracer traceOut = new ParseTracer(printParseInfo ? fVerboseOut : null);

        InputSource inputSource = getInputSource(metaData);
        LineNumberMap lineNumberMap = getLineNumberMap(inputSource);

        boolean isHtmlDocument = !isXMLDocument(metaData);
        XMLCDomFactory domFactory
            = XMLCDomFactoryCache.createFactory(documentClass.getDomFactoryClass(isHtmlDocument),
                                                isHtmlDocument);
        if (fVerbose) {
            fVerboseOut.println(">>> using DOM Factory class: " + domFactory.getClass().getName());
        }
        XMLCDomFactoryCache.checkForOutdatedClass(domFactory);

        setupParser(metaData, parser.getName(), isHtmlDocument);

        boolean saveWarnings = fErrorReporter.getPrintWarnings();
        fErrorReporter.setPrintWarnings(parser.getWarnings());

        XMLCDocument xmlcDoc;
        try {
            xmlcDoc = fXMLCParser.parse(inputSource, 
                                        lineNumberMap,
                                        domFactory,
                                        metaData,
                                        fErrorReporter,
                                        traceOut);
        } catch (SAXException except) {
            Exception useExcept = except.getException();
            if (useExcept == null) {
                useExcept = except;
            }
            throw new XMLCException("Parse of \"" + inputSource.getSystemId()
                                    + "\" failed: " + useExcept, useExcept);
        } finally {
            fErrorReporter.setPrintWarnings(saveWarnings);
        }
        int cnt = fErrorReporter.getErrorCnt();
        if (cnt > 0) {
            throw new XMLCException(cnt + " error" + ((cnt == 1) ? "" : "s")
                                    + " parsing document");
        }

        // Normalize the text nodes.
        Element root = xmlcDoc.getDocument().getDocumentElement();
        if (root != null) {
            root.normalize();
        }

        return xmlcDoc;
    }    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy