
org.enhydra.xml.xmlc.compiler.Parse Maven / Gradle / Ivy
The newest version!
/*
* Enhydra Java Application Server Project
*
* The contents of this file are subject to the Enhydra Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License on
* the Enhydra web site ( http://www.enhydra.org/ ).
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific terms governing rights and limitations
* under the License.
*
* The Initial Developer of the Enhydra Application Server is Lutris
* Technologies, Inc. The Enhydra Application Server and portions created
* by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
* All Rights Reserved.
*
* Contributor(s):
*
* $Id: Parse.java,v 1.3 2005/01/26 08:29:24 jkjome Exp $
*/
package org.enhydra.xml.xmlc.compiler;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import org.enhydra.xml.io.ErrorReporter;
import org.enhydra.xml.io.InputSourceOps;
import org.enhydra.xml.xmlc.XMLCError;
import org.enhydra.xml.xmlc.XMLCException;
import org.enhydra.xml.xmlc.dom.XMLCDocument;
import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
import org.enhydra.xml.xmlc.dom.XMLCDomFactoryCache;
import org.enhydra.xml.xmlc.html.parsers.swing.SwingHTMLParser;
import org.enhydra.xml.xmlc.html.parsers.tidy.TidyHTMLParser;
import org.enhydra.xml.xmlc.metadata.CompileOptions;
import org.enhydra.xml.xmlc.metadata.DocumentClass;
import org.enhydra.xml.xmlc.metadata.DocumentFormat;
import org.enhydra.xml.xmlc.metadata.InputDocument;
import org.enhydra.xml.xmlc.metadata.MetaData;
import org.enhydra.xml.xmlc.metadata.Parser;
import org.enhydra.xml.xmlc.metadata.ParserType;
import org.enhydra.xml.xmlc.misc.LineNumberMap;
import org.enhydra.xml.xmlc.misc.SSIReader;
import org.enhydra.xml.xmlc.parsers.ParseTracer;
import org.enhydra.xml.xmlc.parsers.XMLCParser;
import org.enhydra.xml.xmlc.parsers.xerces.XercesParser;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Parse a XML or HTML document into a DOM.
*/
public class Parse {
/**
* XML parser object.
*/
private XMLCParser fXMLCParser;
/**
* Error output.
*/
private ErrorReporter fErrorReporter;
/**
* Verbose output stream.
*/
private PrintWriter fVerboseOut;
/**
* Print verbose messages.
*/
private boolean fVerbose;
/**
* Construct a new file parser.
*
* @param errorReporter Object used to handle errors.
* @param verboseOut Output stream for verbose and trace information.
*/
public Parse(ErrorReporter errorReporter,
PrintWriter verboseOut) {
fErrorReporter = errorReporter;
fVerboseOut = verboseOut;
}
/*
* Is this an the name of an HTML parser?
*/
private boolean isHtmlParser(ParserType parser) {
return (parser == ParserType.SWING)
|| (parser == ParserType.TIDY);
}
/**
* Determine the parser to use.
*/
private void setupParser(MetaData metaData,
ParserType parser,
boolean isHtmlDocument)
throws XMLCException, IOException {
if (parser == null) {
// Use default parser based on document type.
if (isHtmlDocument) {
parser = ParserType.TIDY;
} else {
parser = ParserType.XERCES;
}
}
// Check for conflicts with parser and document type.
if (isHtmlDocument) {
if (!isHtmlParser(parser)) {
throw new XMLCException("Document appears to be an HTML document; the "
+ parser + " parser only supports XML"
+ " (does the document start with `'?): "
+ getInputSourceDesc(metaData));
}
} else {
if (isHtmlParser(parser)) {
throw new XMLCException("Document appears to be an XML document; the "
+ parser + " parser only supports HTML: "
+ getInputSourceDesc(metaData));
}
if (metaData.getHTMLSection() != null) {
//FIXME: need to include check for options being defaulted.
//throw new XMLCException("HTML options may not be specified for a XML document");
}
}
// Load and initialize parser.
if (parser == ParserType.SWING) {
fXMLCParser = new SwingHTMLParser();
} else if (parser == ParserType.TIDY) {
fXMLCParser = new TidyHTMLParser();
} else if (parser == ParserType.XERCES) {
fXMLCParser = new XercesParser();
} else {
throw new XMLCError("Unknown parser \"" + parser + "\"");
}
}
/**
* Determine if this is an XML or HTML document. Its either explictly
* specified or must be determined by looking at the file.
*/
private boolean isXMLDocument(MetaData metaData) throws IOException {
InputDocument inputDoc = metaData.getInputDocument();
DocumentFormat docFormat = inputDoc.getDocumentFormat();
if (docFormat == DocumentFormat.XML) {
return true;
} else if (docFormat == DocumentFormat.HTML) {
return false;
} else {
return InputSourceOps.isXMLDocument(inputDoc.getInputSource());
}
}
/**
* Get the input source, handling SSI filtering.
*/
private InputSource getInputSource(MetaData metaData) throws IOException {
InputDocument inputDoc = metaData.getInputDocument();
InputSource inputSource = inputDoc.getInputSource();
if (inputDoc.getProcessSSI()) {
// dbr_20020128.1_start
//return SSIReader.create(inputSource);
return SSIReader.create(inputSource, inputDoc.getSSIBase());
// dbr_20020128.1_end
} else {
return inputSource;
}
}
/** Get a description of the input source for error messages */
private String getInputSourceDesc(MetaData metaData) {
InputDocument inputDoc = metaData.getInputDocument();
return InputSourceOps.getName(inputDoc.getInputSource());
}
/**
* Get the line number map to pass to the parse, or null
* if one is not in uses.
*/
private LineNumberMap getLineNumberMap(InputSource input) {
Reader reader = input.getCharacterStream();
if (reader instanceof SSIReader) {
return ((SSIReader)reader).getLineNumberMap();
} else {
return null;
}
}
/*
* Parse the page into the DOM and perform various checks and edits.
*
* @param metaData Document metadata.
* @param verboseOut Write verbose and trace information output stream.
*/
public XMLCDocument parse(MetaData metaData)
throws XMLCException, IOException {
Parser parser = metaData.getParser();
CompileOptions compileOptions = metaData.getCompileOptions();
DocumentClass documentClass = metaData.getDocumentClass();
// Setup tracing
if (fVerboseOut != null) {
// Only enable verbose output if a stream is available
// and its requested.
fVerbose = compileOptions.getVerbose();
}
boolean printParseInfo
= (compileOptions.getPrintParseInfo() && (fVerboseOut != null));
ParseTracer traceOut = new ParseTracer(printParseInfo ? fVerboseOut : null);
InputSource inputSource = getInputSource(metaData);
LineNumberMap lineNumberMap = getLineNumberMap(inputSource);
boolean isHtmlDocument = !isXMLDocument(metaData);
XMLCDomFactory domFactory
= XMLCDomFactoryCache.createFactory(documentClass.getDomFactoryClass(isHtmlDocument),
isHtmlDocument);
if (fVerbose) {
fVerboseOut.println(">>> using DOM Factory class: " + domFactory.getClass().getName());
}
XMLCDomFactoryCache.checkForOutdatedClass(domFactory);
setupParser(metaData, parser.getName(), isHtmlDocument);
boolean saveWarnings = fErrorReporter.getPrintWarnings();
fErrorReporter.setPrintWarnings(parser.getWarnings());
XMLCDocument xmlcDoc;
try {
xmlcDoc = fXMLCParser.parse(inputSource,
lineNumberMap,
domFactory,
metaData,
fErrorReporter,
traceOut);
} catch (SAXException except) {
Exception useExcept = except.getException();
if (useExcept == null) {
useExcept = except;
}
throw new XMLCException("Parse of \"" + inputSource.getSystemId()
+ "\" failed: " + useExcept, useExcept);
} finally {
fErrorReporter.setPrintWarnings(saveWarnings);
}
int cnt = fErrorReporter.getErrorCnt();
if (cnt > 0) {
throw new XMLCException(cnt + " error" + ((cnt == 1) ? "" : "s")
+ " parsing document");
}
// Normalize the text nodes.
Element root = xmlcDoc.getDocument().getDocumentElement();
if (root != null) {
root.normalize();
}
return xmlcDoc;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy