com.openhtmltopdf.resource.XMLResource Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openhtmltopdf-core Show documentation
Show all versions of openhtmltopdf-core Show documentation
Open HTML to PDF is a CSS 2.1 renderer written in Java. This artifact contains the core rendering and layout code.
/*
* {{{ header & license
* Copyright (c) 2004, 2005 Who?
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
* }}}
*/
package com.openhtmltopdf.resource;
import java.io.InputStream;
import java.io.Reader;
import java.util.logging.Level;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXSource;
import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import com.openhtmltopdf.util.Configuration;
import com.openhtmltopdf.util.ThreadCtx;
import com.openhtmltopdf.util.XRLog;
import com.openhtmltopdf.util.XRRuntimeException;
/**
* @author Patrick Wright
*/
public class XMLResource extends AbstractResource {
private Document document;
private static final XMLResourceBuilder XML_RESOURCE_BUILDER;
private static boolean useConfiguredParser;
static {
XML_RESOURCE_BUILDER = new XMLResourceBuilder();
useConfiguredParser = true;
}
private XMLResource(InputStream stream) {
super(stream);
}
private XMLResource(InputSource source) {
super(source);
}
public static XMLResource load(InputStream stream) {
return XML_RESOURCE_BUILDER.createXMLResource(new XMLResource(stream));
}
public static XMLResource load(InputSource source) {
return XML_RESOURCE_BUILDER.createXMLResource(new XMLResource(source));
}
public static XMLResource load(Reader reader) {
return XML_RESOURCE_BUILDER.createXMLResource(new XMLResource(new InputSource(reader)));
}
@Deprecated
public static XMLResource load(Source source) {
return XML_RESOURCE_BUILDER.createXMLResource(source);
}
public Document getDocument() {
return document;
}
/*package*/
void setDocument(Document document) {
this.document = document;
}
public static final XMLReader newXMLReader() {
XMLReader xmlReader = null;
String xmlReaderClass = Configuration.valueFor("xr.load.xml-reader");
//TODO: if it doesn't find the parser, note that in a static boolean--otherwise
// you get exceptions on every load
try {
if (xmlReaderClass != null &&
!xmlReaderClass.toLowerCase().equals("default") &&
XMLResource.useConfiguredParser) {
try {
Class.forName(xmlReaderClass);
} catch (Exception ex) {
XMLResource.useConfiguredParser = false;
XRLog.load(Level.WARNING,
"The XMLReader class you specified as a configuration property " +
"could not be found. Class.forName() failed on "
+ xmlReaderClass + ". Please check classpath. Use value 'default' in " +
"FS configuration if necessary. Will now try JDK default.");
}
if (XMLResource.useConfiguredParser) {
xmlReader = XMLReaderFactory.createXMLReader(xmlReaderClass);
}
}
} catch (Exception ex) {
XRLog.load(Level.WARNING,
"Could not instantiate custom XMLReader class for XML parsing: "
+ xmlReaderClass + ". Please check classpath. Use value 'default' in " +
"FS configuration if necessary. Will now try JDK default.", ex);
}
if (xmlReader == null) {
try {
// JDK default
// HACK: if
/*CHECK: does this code do anything?
if (System.getProperty("org.xml.sax.driver") == null) {
String newDefault = "org.apache.crimson.parser.XMLReaderImpl";
XRLog.load(Level.WARNING,
"No value for system property 'org.xml.sax.driver'.");
}
*/
xmlReader = XMLReaderFactory.createXMLReader();
xmlReaderClass = "{JDK default}";
} catch (Exception ex) {
XRLog.general(ex.getMessage());
}
}
if (xmlReader == null) {
try {
XRLog.load(Level.WARNING, "falling back on the default parser");
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
xmlReader = parser.getXMLReader();
xmlReaderClass = "SAXParserFactory default";
} catch (Exception ex) {
XRLog.general(ex.getMessage());
}
}
if (xmlReader == null) {
throw new XRRuntimeException("Could not instantiate any SAX 2 parser, including JDK default. " +
"The name of the class to use should have been read from the org.xml.sax.driver System " +
"property, which is set to: "/*CHECK: is this meaningful? + System.getProperty("org.xml.sax.driver")*/);
}
XRLog.load("SAX XMLReader in use (parser): " + xmlReader.getClass().getName());
return xmlReader;
}
private static class XMLResourceBuilder {
private void setXmlReaderSecurityFeatures(XMLReader xmlReader) {
try {
// VERY IMPORTANT: Without these lines, users can pull in arbitary files from the system using XXE.
// DO NOT REMOVE!
xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
xmlReader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true);
xmlReader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (SAXNotSupportedException e) {
XRLog.load(Level.SEVERE, "Unable to disable XML External Entities, which might put you at risk to XXE attacks", e);
} catch (SAXNotRecognizedException e) {
XRLog.load(Level.SEVERE, "Unable to disable XML External Entities, which might put you at risk to XXE attacks", e);
}
}
private void setDocumentBuilderSecurityFeatures(DocumentBuilderFactory dbf) {
try {
// VERY IMPORTANT: Without these lines, users can pull in arbitary files from the system using XXE.
// DO NOT REMOVE!
dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (ParserConfigurationException e) {
XRLog.load(Level.SEVERE, "Unable to disable XML External Entities, which might put you at risk to XXE attacks", e);
}
}
private void setTranformerFactorySecurityFeatures(TransformerFactory xformFactory) {
try {
xformFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
xformFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
} catch (IllegalArgumentException e) {
XRLog.load(Level.SEVERE, "Unable to disable XML External Entities, which might put you at risk to XXE attacks", e);
}
}
private TransformerFactory loadPreferredTransformerFactory(String preferredImpl) {
try {
return TransformerFactory.newInstance(preferredImpl, null);
} catch (TransformerFactoryConfigurationError e) {
XRLog.load(Level.SEVERE, "Could not load preferred XML transformer, using default which may not be secure.");
return TransformerFactory.newInstance();
}
}
private DocumentBuilderFactory loadPreferredDocumentBuilderFactory(String preferredImpl) {
try {
return preferredImpl == null ? DocumentBuilderFactory.newInstance() : DocumentBuilderFactory.newInstance(preferredImpl, null);
} catch (FactoryConfigurationError e) {
XRLog.load(Level.SEVERE, "Could not load preferred XML document builder, using default which may not be secure.");
return DocumentBuilderFactory.newInstance();
}
}
private XMLResource createXMLResource(XMLResource target) {
Source input = null;
DOMResult output = null;
TransformerFactory xformFactory = null;
Transformer idTransform = null;
XMLReader xmlReader = null;
long st = 0L;
xmlReader = XMLResource.newXMLReader();
setXmlReaderSecurityFeatures(xmlReader);
addHandlers(xmlReader);
setParserFeatures(xmlReader);
st = System.currentTimeMillis();
try {
input = new SAXSource(xmlReader, target.getResourceInputSource());
String preferredDocumentBuilderFactory = ThreadCtx.get().sharedContext()._preferredDocumentBuilderFactoryImplementationClass;
DocumentBuilderFactory dbf = loadPreferredDocumentBuilderFactory(preferredDocumentBuilderFactory);
setDocumentBuilderSecurityFeatures(dbf);
dbf.setNamespaceAware(true);
dbf.setValidating(false); // validation is the root of all evil in xml - tobe
output = new DOMResult(dbf.newDocumentBuilder().newDocument());
String preferredTransformerFactory = ThreadCtx.get().sharedContext()._preferredTransformerFactoryImplementationClass;
if (preferredTransformerFactory == null) {
xformFactory = TransformerFactory.newInstance();
} else {
xformFactory = loadPreferredTransformerFactory(preferredTransformerFactory);
}
setTranformerFactorySecurityFeatures(xformFactory);
idTransform = xformFactory.newTransformer();
} catch (Exception ex) {
throw new XRRuntimeException(
"Failed on configuring SAX to DOM transformer.", ex);
}
try {
idTransform.transform(input, output);
} catch (Exception ex) {
throw new XRRuntimeException(
"Can't load the XML resource (using TRaX transformer). " + ex.getMessage(), ex);
}
long end = System.currentTimeMillis();
target.setElapsedLoadTime(end - st);
XRLog.load("Loaded document in ~" + target.getElapsedLoadTime() + "ms");
target.setDocument((Document) output.getNode());
return target;
}
/**
* Adds the default EntityResolved and ErrorHandler for the SAX parser.
*/
private void addHandlers(XMLReader xmlReader) {
try {
// add our own entity resolver
xmlReader.setEntityResolver(FSEntityResolver.instance());
xmlReader.setErrorHandler(new ErrorHandler() {
public void error(SAXParseException ex) {
XRLog.load(ex.getMessage());
}
public void fatalError(SAXParseException ex) {
XRLog.load(ex.getMessage());
}
public void warning(SAXParseException ex) {
XRLog.load(ex.getMessage());
}
});
} catch (Exception ex) {
throw new XRRuntimeException("Failed on configuring SAX parser/XMLReader.", ex);
}
}
/**
* Sets all standard features for SAX parser, using values from Configuration.
*/
private void setParserFeatures(XMLReader xmlReader) {
try { // perf: validation off
xmlReader.setFeature("http://xml.org/sax/features/validation", false);
// perf: namespaces
xmlReader.setFeature("http://xml.org/sax/features/namespaces", true);
} catch (SAXException s) {
// nothing to do--some parsers will not allow setting features
XRLog.load(Level.WARNING, "Could not set validation/namespace features for XML parser," +
"exception thrown.", s);
}
if (Configuration.isFalse("xr.load.configure-features", false)) {
XRLog.load(Level.FINE, "SAX Parser: by request, not changing any parser features.");
return;
}
// perf: validation off
setFeature(xmlReader, "http://xml.org/sax/features/validation", "xr.load.validation");
// mem: intern strings
setFeature(xmlReader, "http://xml.org/sax/features/string-interning", "xr.load.string-interning");
// perf: namespaces
setFeature(xmlReader, "http://xml.org/sax/features/namespaces", "xr.load.namespaces");
setFeature(xmlReader, "http://xml.org/sax/features/namespace-prefixes", "xr.load.namespace-prefixes");
}
/**
* Attempts to set requested feature on the parser; logs exception if not supported
* or not recognized.
*/
private void setFeature(XMLReader xmlReader, String featureUri, String configName) {
try {
xmlReader.setFeature(featureUri, Configuration.isTrue(configName, false));
XRLog.load(Level.FINE, "SAX Parser feature: " +
featureUri.substring(featureUri.lastIndexOf("/")) +
" set to " +
xmlReader.getFeature(featureUri));
} catch (SAXNotSupportedException ex) {
XRLog.load(Level.WARNING, "SAX feature not supported on this XMLReader: " + featureUri);
} catch (SAXNotRecognizedException ex) {
XRLog.load(Level.WARNING, "SAX feature not recognized on this XMLReader: " + featureUri +
". Feature may be properly named, but not recognized by this parser.");
}
}
@Deprecated
public XMLResource createXMLResource(Source source) {
DOMResult output = null;
TransformerFactory xformFactory = null;
Transformer idTransform = null;
long st = 0L;
st = System.currentTimeMillis();
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
// VERY IMPORTANT: Without these lines, users can pull in arbitary files from the system using XXE.
// DO NOT REMOVE!
dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
dbf.setNamespaceAware(true);
dbf.setValidating(false);//validation is the root of all evil in xml - tobe
output = new DOMResult(dbf.newDocumentBuilder().newDocument());
try {
xformFactory = TransformerFactory.newInstance("com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl", null);
} catch(Exception e) {
XRLog.load(Level.SEVERE, "Could not load preferred XML transformer, using default which may not be secure.");
xformFactory = TransformerFactory.newInstance();
}
xformFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
xformFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
idTransform = xformFactory.newTransformer();
} catch (Exception ex) {
throw new XRRuntimeException("Failed on configuring SAX to DOM transformer.", ex);
}
try {
idTransform.transform(source, output);
} catch (Exception ex) {
throw new XRRuntimeException("Can't load the XML resource (using TRaX transformer). " + ex.getMessage(), ex);
}
long end = System.currentTimeMillis();
//HACK: should rather use a default constructor
XMLResource target = new XMLResource((InputSource) null);
target.setElapsedLoadTime(end - st);
XRLog.load("Loaded document in ~" + target.getElapsedLoadTime() + "ms");
target.setDocument((Document) output.getNode());
return target;
}
}
}
/*
* $Id$
*
* $Log$
* Revision 1.20 2007/05/15 22:01:42 peterbrant
* Remove unused code
*
* Revision 1.19 2006/07/26 18:09:42 pdoubleya
* Clean exception throws.
*
* Revision 1.18 2006/02/02 02:47:36 peterbrant
* Support non-AWT images
*
* Revision 1.17 2005/10/22 00:09:18 peterbrant
* Rollback to 1.15
*
* Revision 1.15 2005/07/02 09:40:24 tobega
* More robust parsing
*
* Revision 1.14 2005/06/26 01:02:21 tobega
* Now checking for SecurityException on System.getProperty
*
* Revision 1.13 2005/06/25 22:16:23 tobega
* Browser now handles both plain text files and images
*
* Revision 1.12 2005/06/15 10:56:14 tobega
* cleaned up a bit of URL mess, centralizing URI-resolution and loading to UserAgentCallback
*
* Revision 1.11 2005/06/13 06:50:16 tobega
* Fixed a bug in table content resolution.
* Various "tweaks" in other stuff.
*
* Revision 1.10 2005/06/01 21:36:41 tobega
* Got image scaling working, and did some refactoring along the way
*
* Revision 1.9 2005/04/20 19:13:18 tobega
* Fixed vertical align. Middle works and all look pretty much like in firefox
*
* Revision 1.8 2005/04/03 21:51:31 joshy
* fixed code that gets the XMLReader on the mac
* added isMacOSX() to GeneralUtil
* added app name and single menu bar to browser
*
* Issue number:
* Obtained from:
* Submitted by:
* Reviewed by:
*
* Revision 1.7 2005/03/28 18:33:03 pdoubleya
* Don't show stack trace if XML can't be loaded.
*
* Revision 1.6 2005/03/22 15:34:23 pdoubleya
* Changed to use XMLReaderFactory, appears to solve namespaces issue (thanks to Elliot Rusty Harold, again!).
*
* Revision 1.5 2005/03/16 19:26:31 pdoubleya
* Fixed to use proper javax.xml.transform instantiation for parser, and only try to load custom parser once, so that you don't get exceptions on each page.
*
* Revision 1.4 2005/02/05 18:09:39 pdoubleya
* Add specific SAX class name if none was specified and if system property is not already set.
*
* Revision 1.3 2005/02/05 17:19:47 pdoubleya
* Refactoring for features support, static factory method for XMLReaders.
*
* Revision 1.2 2005/02/05 11:33:33 pdoubleya
* Added load() to XMLResource, and accept overloaded input: InputSource, stream, URL.
*
* Revision 1.1 2005/02/03 20:39:35 pdoubleya
* Added to CVS.
*
*
*/