js.dom.w3c.DocumentBuilderImpl Maven / Gradle / Ivy
Show all versions of js-dom Show documentation
package js.dom.w3c;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.nio.charset.Charset;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.validation.Schema;
import js.dom.Document;
import js.dom.DocumentBuilder;
import js.lang.BugError;
import js.log.Log;
import js.log.LogFactory;
import org.apache.html.dom.HTMLDocumentImpl;
import org.cyberneko.html.parsers.DOMParser;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* Document object builder. Supply factory methods for documents creation, parsing from string and loading from various sources:
* file, input stream, input source and URL. There are different factory methods for XML and HTML documents and all are in two
* flavors: with or without name space support. For name space support this class follows W3C DOM notation convention and uses
* NS
suffix.
*
* All loaders use XML declaration or HTML meta Content-Type to choose characters encoding; anyway, loader variant using input
* source can force a particular encoding.
*
* @author Iulian Rotaru
*/
public final class DocumentBuilderImpl implements DocumentBuilder {
/** Class logger. */
private final static Log log = LogFactory.getLog(DocumentBuilderImpl.class);
/** XML parser feature for name space support. */
private static final String FEAT_NAMESPACES = "http://xml.org/sax/features/namespaces";
/** XML parser feature for schema validation. */
private static final String FEAT_SCHEMA_VALIDATION = "http://apache.org/xml/features/validation/schema";
/** XML parser feature for DOCTYPE disable. */
private static final String FEAT_DOCTYPE_DECL = "http://apache.org/xml/features/disallow-doctype-decl";
// ----------------------------------------------------
// create empty XML document
@Override
public Document createXML(String root) {
return createXML(root, false);
}
@Override
public Document createXMLNS(String root) {
return createXML(root, true);
}
/**
* Helper method for XML document creation.
*
* @param root name of the root element,
* @param useNamespace flag to control name space awareness.
* @return newly create document.
*/
private static Document createXML(String root, boolean useNamespace) {
try {
org.w3c.dom.Document doc = getDocumentBuilder(null, useNamespace).newDocument();
doc.appendChild(doc.createElement(root));
return new DocumentImpl(doc);
} catch (Exception e) {
throw new DomException(e);
}
}
// ----------------------------------------------------
// parse XML document from string source
@Override
public Document parseXML(String string) {
try {
return loadXML(new ByteArrayInputStream(string.getBytes("UTF-8")));
} catch (UnsupportedEncodingException e) {
throw new BugError("JVM with missing support for UTF-8.");
}
}
@Override
public Document parseXMLNS(String string) {
try {
return loadXMLNS(new ByteArrayInputStream(string.getBytes("UTF-8")));
} catch (UnsupportedEncodingException e) {
throw new BugError("JVM with missing support for UTF-8.");
}
}
// ----------------------------------------------------
// load XML document from file
@Override
public Document loadXML(File file) throws FileNotFoundException {
return loadXML(new FileInputStream(file));
}
@Override
public Document loadXMLNS(File file) throws FileNotFoundException {
return loadXMLNS(new FileInputStream(file));
}
// ----------------------------------------------------
// load XML document from input stream
@Override
public Document loadXML(InputStream stream) {
return loadXML(new InputSource(stream));
}
@Override
public Document loadXMLNS(InputStream stream) {
return loadXMLNS(new InputSource(stream));
}
// ----------------------------------------------------
// load XML document from reader
@Override
public Document loadXML(Reader reader) {
return loadXML(new InputSource(reader));
}
@Override
public Document loadXMLNS(Reader reader) {
return loadXMLNS(new InputSource(reader));
}
// ----------------------------------------------------
// load XML document from input source
@Override
public Document loadXML(InputSource source) {
return loadXML(source, false);
}
@Override
public Document loadXMLNS(InputSource source) {
return loadXML(source, true);
}
/**
* Helper method to load XML document from input source.
*
* @param source input source,
* @param useNamespace flag to control name space awareness.
* @return newly created XML document.
*/
private static Document loadXML(InputSource source, boolean useNamespace) {
try {
org.w3c.dom.Document doc = getDocumentBuilder(null, useNamespace).parse(source);
return new DocumentImpl(doc);
} catch (Exception e) {
throw new DomException(e);
} finally {
close(source);
}
}
// ----------------------------------------------------
// load XML document from URL
@Override
public Document loadXML(URL url) {
return loadXML(url, false);
}
@Override
public Document loadXMLNS(URL url) {
return loadXML(url, true);
}
/**
* Helper method to load XML document from URL.
*
* @param url source URL,
* @param useNamespace flag to control name space awareness.
* @return newly created XML document.
*/
private Document loadXML(URL url, boolean useNamespace) {
InputStream stream = null;
try {
stream = url.openConnection().getInputStream();
InputSource source = new InputSource(stream);
return useNamespace ? loadXMLNS(source) : loadXML(source);
} catch (Exception e) {
throw new DomException(e);
} finally {
close(stream);
}
}
// ----------------------------------------------------
// create empty HTML document
@Override
public Document createHTML() {
return new DocumentImpl(new HTMLDocumentImpl());
}
// ----------------------------------------------------
// load HTML document from string source
@Override
public Document parseHTML(String string) {
return loadHTML(new ByteArrayInputStream(string.getBytes()));
}
@Override
public Document parseHTMLNS(String string) {
return loadHTMLNS(new ByteArrayInputStream(string.getBytes()));
}
// ----------------------------------------------------
// load HTML document from file
@Override
public Document loadHTML(File file) throws FileNotFoundException {
return loadHTML(new FileInputStream(file));
}
@Override
public Document loadHTMLNS(File file) throws FileNotFoundException {
return loadHTMLNS(new FileInputStream(file));
}
@Override
public Document loadHTML(File file, String encoding) throws FileNotFoundException {
return loadHTML(new FileInputStream(file), encoding);
}
@Override
public Document loadHTMLNS(File file, String encoding) throws FileNotFoundException {
return loadHTMLNS(new FileInputStream(file), encoding);
}
// ----------------------------------------------------
// load HTML document from input stream
@Override
public Document loadHTML(InputStream stream) {
return loadHTML(new InputSource(stream));
}
@Override
public Document loadHTMLNS(InputStream stream) {
return loadHTMLNS(new InputSource(stream));
}
@Override
public Document loadHTML(InputStream stream, String encoding) {
return loadHTML(new InputSource(stream), encoding);
}
@Override
public Document loadHTMLNS(InputStream stream, String encoding) {
return loadHTMLNS(new InputSource(stream), encoding);
}
// ----------------------------------------------------
// load HTML document from reader
@Override
public Document loadHTML(Reader reader) {
return loadHTML(reader, Charset.defaultCharset().name());
}
@Override
public Document loadHTMLNS(Reader reader) {
return loadHTMLNS(reader, Charset.defaultCharset().name());
}
@Override
public Document loadHTML(Reader reader, String encoding) {
return loadHTML(new InputSource(reader), encoding);
}
@Override
public Document loadHTMLNS(Reader reader, String encoding) {
return loadHTMLNS(new InputSource(reader), encoding);
}
// ----------------------------------------------------
// load HTML document from input source
@Override
public Document loadHTML(InputSource source) {
return loadHTML(source, "UTF-8");
}
@Override
public Document loadHTMLNS(InputSource source) {
return loadHTMLNS(source, "UTF-8");
}
@Override
public Document loadHTML(InputSource source, String encoding) {
source.setEncoding(encoding);
try {
return loadHTML(source, false);
} catch (Exception e) {
throw new DomException(e);
} finally {
close(source);
}
}
@Override
public Document loadHTMLNS(InputSource source, String encoding) {
source.setEncoding(encoding);
try {
return loadHTML(source, true);
} catch (Exception e) {
throw new DomException(e);
} finally {
close(source);
}
}
/**
* Helper for loading HTML document from input source.
*
* @param source input source,
* @param useNamespace flag set to true if document should be name space aware.
* @return newly created HTML document.
* @throws SAXException if input source is not valid XML,
* @throws IOException if reading from input stream fails.
*/
private static Document loadHTML(InputSource source, boolean useNamespace) throws SAXException, IOException {
DOMParser parser = new DOMParser();
// source http://nekohtml.sourceforge.net/faq.html#hierarchy
parser.setFeature(FEAT_NAMESPACES, useNamespace);
parser.parse(source);
return new DocumentImpl(parser.getDocument());
}
// ----------------------------------------------------
// load HTML document from URL
@Override
public Document loadHTML(URL url) {
return loadHTML(url, false);
}
@Override
public Document loadHTMLNS(URL url) {
return loadHTML(url, true);
}
/**
* Helper method for HTML loading from URL.
*
* @param url HTML document hyper source,
* @param useNamespace flag true if loaded document instance should have name space support.
* @return newly created and loaded document instance.
*/
private static Document loadHTML(URL url, boolean useNamespace) {
InputStream stream = null;
try {
stream = url.openConnection().getInputStream();
return loadHTML(new InputSource(stream), useNamespace);
} catch (Exception e) {
throw new DomException(e);
} finally {
close(stream);
}
}
// ----------------------------------------------------
/**
* Close input source.
*
* @param source input source to be closed.
*/
private static void close(InputSource source) {
if (source != null) {
if (source.getByteStream() != null) {
close(source.getByteStream());
}
if (source.getCharacterStream() != null) {
close(source.getCharacterStream());
}
}
}
/**
* Close closeable converting IO exception to unchecked DOM exception.
*
* @param closeable closeable to close.
*/
private static void close(Closeable closeable) {
try {
if (closeable != null) {
closeable.close();
}
} catch (IOException e) {
throw new DomException(e);
}
}
/**
* Document building error handler.
*
* @author Iulian Rotaru
*/
static class ErrorHandlerImpl implements ErrorHandler {
/**
* Record parser fatal error to builder class logger.
*/
public void fatalError(SAXParseException exception) throws SAXException {
log.fatal(exception);
}
/**
* Record parser error to builder class logger.
*/
public void error(SAXParseException exception) throws SAXException {
log.error(exception);
}
/**
* Record parser warning to builder class logger.
*/
public void warning(SAXParseException exception) throws SAXException {
log.warn(exception);
}
}
/**
* Get XML document builder.
*
* @param schema XML schema,
* @param useNamespace flag to use name space.
* @return XML document builder.
* @throws ParserConfigurationException if document builder factory feature set fail.
*/
private static javax.xml.parsers.DocumentBuilder getDocumentBuilder(Schema schema, boolean useNamespace) throws ParserConfigurationException {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setIgnoringComments(true);
dbf.setIgnoringElementContentWhitespace(true);
dbf.setCoalescing(true);
if (schema != null) {
// because schema is used throws fatal error if XML document contains DOCTYPE declaration
dbf.setFeature(FEAT_DOCTYPE_DECL, true);
// excerpt from document builder factory api:
// Note that "the validation" here means a validating parser as defined in the XML recommendation. In other words,
// it essentially just controls the DTD validation.
// To use modern schema languages such as W3C XML Schema or RELAX NG instead of DTD, you can configure your parser
// to be a non-validating parser by leaving the setValidating(boolean) method false, then use the setSchema(Schema)
// method to associate a schema to a parser.
dbf.setValidating(false);
// XML schema validation requires namespace support
dbf.setFeature(FEAT_SCHEMA_VALIDATION, true);
dbf.setNamespaceAware(true);
dbf.setSchema(schema);
} else {
// disable parser XML schema support; it is enabled by default
dbf.setFeature(FEAT_SCHEMA_VALIDATION, false);
dbf.setValidating(false);
dbf.setNamespaceAware(useNamespace);
}
javax.xml.parsers.DocumentBuilder db = dbf.newDocumentBuilder();
db.setEntityResolver(new EntityResolverImpl());
db.setErrorHandler(new ErrorHandlerImpl());
return db;
}
}