org.mozilla.javascript.xmlimpl.XmlProcessor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rhino-runtime Show documentation
Show all versions of rhino-runtime Show documentation
Rhino JavaScript runtime jar, excludes tools & JSR-223 Script Engine wrapper.
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
package org.mozilla.javascript.xmlimpl;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.LinkedBlockingDeque;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.ScriptRuntime;
import org.w3c.dom.Attr;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node; // Disambiguate from org.mozilla.javascript.Node
import org.w3c.dom.NodeList;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXParseException;
class XmlProcessor implements Serializable {
private static final long serialVersionUID = 6903514433204808713L;
private boolean ignoreComments;
private boolean ignoreProcessingInstructions;
private boolean ignoreWhitespace;
private boolean prettyPrint;
private int prettyIndent;
private transient javax.xml.parsers.DocumentBuilderFactory dom;
private transient javax.xml.transform.TransformerFactory xform;
private transient LinkedBlockingDeque documentBuilderPool;
private RhinoSAXErrorHandler errorHandler = new RhinoSAXErrorHandler();
private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
stream.defaultReadObject();
this.dom = DocumentBuilderFactory.newInstance();
this.dom.setNamespaceAware(true);
this.dom.setIgnoringComments(false);
// create TF and set settings to secure it from XSLT attacks if given a malicious node in
// toXMLString
this.xform = javax.xml.transform.TransformerFactory.newInstance();
Context ctx = Context.getCurrentContext();
if (ctx == null || ctx.hasFeature(Context.FEATURE_ENABLE_XML_SECURE_PARSING)) {
configureSecureDBF(this.dom);
configureSecureTF(this.xform);
}
int poolSize = Runtime.getRuntime().availableProcessors() * 2;
this.documentBuilderPool = new LinkedBlockingDeque(poolSize);
}
/*
* Secure implementation of a DocumentBuilderFactory to prevent XXE and SSRF attacks
*/
private void configureSecureDBF(DocumentBuilderFactory dbf) {
try {
// This feature is required to be supported by all DocumentBuilderFactories.
dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
// Disallow XIncludeAware as it is an SSRF target using xi:include.
// This should also be supported on all XML processors.
dbf.setXIncludeAware(false);
} catch (ParserConfigurationException e) {
throw new RuntimeException(
"XML parser (DocumentBuilderFactory) cannot be securely configured.", e);
}
// The rest of these features should be set for the best security by default.
// However, not all XML processing implementations support them.
// So we will attempt to set each one but continue if we can't.
try {
// Prevent File attacks in DBF
// Disallow all doctypes, removing all ENTITY-type tags as a vector
dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
} catch (ParserConfigurationException e) {
// Ignore this, because it will not work on all implementations
}
try {
// Prevent SSRF attacks in DBF
// Do not load external dtds, if the underlying DocBuilderFactory is set for a
// validation mode
dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
} catch (ParserConfigurationException e) {
// Ignore this, because it will not work on all implementations
}
}
/*
* Secure implementation of a TransformerFactory to prevent XXE and SSRF attacks
*/
private void configureSecureTF(javax.xml.transform.TransformerFactory xform) {
try {
// Disallow all XXEs and SSRF via external calls for DTDs or Stylesheets.
// This feature is required to be supported by all TransformerFactory implementations.
xform.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (TransformerConfigurationException e) {
throw new RuntimeException(
"XML parser (TransformerFactory) cannot be securely configured.", e);
}
// These next parameters make extra-sure that we have a secure configuration,
// but are not supported on all implementations.
try {
xform.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
} catch (IllegalArgumentException e) {
// Ignore this, because it will not work on all implementations
}
try {
xform.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
} catch (IllegalArgumentException e) {
// Ignore this, because it will not work on all implementations
}
}
private static class RhinoSAXErrorHandler implements ErrorHandler, Serializable {
private static final long serialVersionUID = 6918417235413084055L;
private void throwError(SAXParseException e) {
throw ScriptRuntime.constructError("TypeError", e.getMessage(), e.getLineNumber() - 1);
}
public void error(SAXParseException e) {
throwError(e);
}
public void fatalError(SAXParseException e) {
throwError(e);
}
public void warning(SAXParseException e) {
Context.reportWarning(e.getMessage());
}
}
XmlProcessor() {
setDefault();
this.dom = DocumentBuilderFactory.newInstance();
this.dom.setNamespaceAware(true);
this.dom.setIgnoringComments(false);
// create TF and set settings to secure it from XSLT attacks if given a malicious node in
// toXMLString
this.xform = javax.xml.transform.TransformerFactory.newInstance();
Context ctx = Context.getCurrentContext();
if (ctx == null || ctx.hasFeature(Context.FEATURE_ENABLE_XML_SECURE_PARSING)) {
configureSecureDBF(this.dom);
configureSecureTF(this.xform);
}
int poolSize = Runtime.getRuntime().availableProcessors() * 2;
this.documentBuilderPool = new LinkedBlockingDeque(poolSize);
}
final void setDefault() {
this.setIgnoreComments(true);
this.setIgnoreProcessingInstructions(true);
this.setIgnoreWhitespace(true);
this.setPrettyPrinting(true);
this.setPrettyIndent(2);
}
final void setIgnoreComments(boolean b) {
this.ignoreComments = b;
}
final void setIgnoreWhitespace(boolean b) {
this.ignoreWhitespace = b;
}
final void setIgnoreProcessingInstructions(boolean b) {
this.ignoreProcessingInstructions = b;
}
final void setPrettyPrinting(boolean b) {
this.prettyPrint = b;
}
final void setPrettyIndent(int i) {
this.prettyIndent = i;
}
final boolean isIgnoreComments() {
return ignoreComments;
}
final boolean isIgnoreProcessingInstructions() {
return ignoreProcessingInstructions;
}
final boolean isIgnoreWhitespace() {
return ignoreWhitespace;
}
final boolean isPrettyPrinting() {
return prettyPrint;
}
final int getPrettyIndent() {
return prettyIndent;
}
private String toXmlNewlines(String rv) {
StringBuilder nl = new StringBuilder(rv.length());
for (int i = 0; i < rv.length(); i++) {
char ch = rv.charAt(i);
if (ch == '\r') {
if (rv.charAt(i + 1) == '\n') {
// DOS, do nothing and skip the \r
} else {
// Macintosh, substitute \n
nl.append('\n');
}
} else {
nl.append(ch);
}
}
return nl.toString();
}
private javax.xml.parsers.DocumentBuilderFactory getDomFactory() {
return dom;
}
// Get from pool, or create one without locking, if needed.
private DocumentBuilder getDocumentBuilderFromPool() throws ParserConfigurationException {
DocumentBuilder builder = documentBuilderPool.pollFirst();
if (builder == null) {
builder = getDomFactory().newDocumentBuilder();
}
builder.setErrorHandler(errorHandler);
return builder;
}
// Insert into pool, if resettable. Pool capacity is limited to
// number of processors * 2.
private void returnDocumentBuilderToPool(DocumentBuilder db) {
try {
db.reset();
// DocumentBuilders are supposed to be namespace-aware.
// This is a sanity check for DocumentBuilder's resettability (a known bug in Android).
if (!db.isNamespaceAware()) {
return;
}
documentBuilderPool.offerFirst(db);
} catch (UnsupportedOperationException e) {
// document builders that don't support reset() can't be pooled
}
}
private void addProcessingInstructionsTo(List list, Node node) {
if (node instanceof ProcessingInstruction) {
list.add(node);
}
if (node.getChildNodes() != null) {
for (int i = 0; i < node.getChildNodes().getLength(); i++) {
addProcessingInstructionsTo(list, node.getChildNodes().item(i));
}
}
}
private void addCommentsTo(List list, Node node) {
if (node instanceof Comment) {
list.add(node);
}
if (node.getChildNodes() != null) {
for (int i = 0; i < node.getChildNodes().getLength(); i++) {
addProcessingInstructionsTo(list, node.getChildNodes().item(i));
}
}
}
private void addTextNodesToRemoveAndTrim(List toRemove, Node node) {
if (node instanceof Text) {
Text text = (Text) node;
boolean BUG_369394_IS_VALID = false;
if (!BUG_369394_IS_VALID) {
text.setData(text.getData().trim());
} else {
if (text.getData().trim().length() == 0) {
text.setData("");
}
}
if (text.getData().length() == 0) {
toRemove.add(node);
}
}
if (node.getChildNodes() != null) {
for (int i = 0; i < node.getChildNodes().getLength(); i++) {
addTextNodesToRemoveAndTrim(toRemove, node.getChildNodes().item(i));
}
}
}
final Node toXml(String defaultNamespaceUri, String xml) throws org.xml.sax.SAXException {
// See ECMA357 10.3.1
DocumentBuilder builder = null;
try {
String syntheticXml =
"" + xml + " ";
builder = getDocumentBuilderFromPool();
Document document =
builder.parse(
new org.xml.sax.InputSource(new java.io.StringReader(syntheticXml)));
if (ignoreProcessingInstructions) {
List list = new java.util.ArrayList();
addProcessingInstructionsTo(list, document);
for (Node node : list) {
node.getParentNode().removeChild(node);
}
}
if (ignoreComments) {
List list = new java.util.ArrayList();
addCommentsTo(list, document);
for (Node node : list) {
node.getParentNode().removeChild(node);
}
}
if (ignoreWhitespace) {
// Apparently JAXP setIgnoringElementContentWhitespace() has a different meaning,
// it appears from the Javadoc
// Refers to element-only content models, which means we would need to have a
// validating parser and DTD or schema
// so that it would know which whitespace to ignore.
// Instead we will try to delete it ourselves.
List list = new java.util.ArrayList();
addTextNodesToRemoveAndTrim(list, document);
for (Node node : list) {
node.getParentNode().removeChild(node);
}
}
NodeList rv = document.getDocumentElement().getChildNodes();
if (rv.getLength() > 1) {
throw ScriptRuntime.constructError(
"SyntaxError", "XML objects may contain at most one node.");
} else if (rv.getLength() == 0) {
Node node = document.createTextNode("");
return node;
} else {
Node node = rv.item(0);
document.getDocumentElement().removeChild(node);
return node;
}
} catch (java.io.IOException e) {
throw new RuntimeException("Unreachable.");
} catch (javax.xml.parsers.ParserConfigurationException e) {
throw new RuntimeException(e);
} finally {
if (builder != null) returnDocumentBuilderToPool(builder);
}
}
Document newDocument() {
DocumentBuilder builder = null;
try {
// TODO Should this use XML settings?
builder = getDocumentBuilderFromPool();
return builder.newDocument();
} catch (javax.xml.parsers.ParserConfigurationException ex) {
// TODO How to handle these runtime errors?
throw new RuntimeException(ex);
} finally {
if (builder != null) returnDocumentBuilderToPool(builder);
}
}
// TODO Cannot remember what this is for, so whether it should use settings or not
private String toString(Node node) {
javax.xml.transform.dom.DOMSource source = new javax.xml.transform.dom.DOMSource(node);
java.io.StringWriter writer = new java.io.StringWriter();
javax.xml.transform.stream.StreamResult result =
new javax.xml.transform.stream.StreamResult(writer);
try {
javax.xml.transform.Transformer transformer = xform.newTransformer();
transformer.setOutputProperty(
javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "no");
transformer.setOutputProperty(javax.xml.transform.OutputKeys.METHOD, "xml");
transformer.transform(source, result);
} catch (javax.xml.transform.TransformerConfigurationException ex) {
// TODO How to handle these runtime errors?
throw new RuntimeException(ex);
} catch (javax.xml.transform.TransformerException ex) {
// TODO How to handle these runtime errors?
throw new RuntimeException(ex);
}
return toXmlNewlines(writer.toString());
}
String escapeAttributeValue(Object value) {
String text = ScriptRuntime.toString(value);
if (text.length() == 0) return "";
Document dom = newDocument();
Element e = dom.createElement("a");
e.setAttribute("b", text);
String elementText = toString(e);
int begin = elementText.indexOf('"');
int end = elementText.lastIndexOf('"');
return elementText.substring(begin + 1, end);
}
String escapeTextValue(Object value) {
if (value instanceof XMLObjectImpl) {
return ((XMLObjectImpl) value).toXMLString();
}
String text = ScriptRuntime.toString(value);
if (text.length() == 0) return text;
Document dom = newDocument();
Element e = dom.createElement("a");
e.setTextContent(text);
String elementText = toString(e);
int begin = elementText.indexOf('>') + 1;
int end = elementText.lastIndexOf('<');
return (begin < end) ? elementText.substring(begin, end) : "";
}
private String escapeElementValue(String s) {
// TODO Check this
return escapeTextValue(s);
}
private String elementToXmlString(Element element) {
// TODO My goodness ECMA is complicated (see 10.2.1). We'll try this first.
Element copy = (Element) element.cloneNode(true);
if (prettyPrint) {
beautifyElement(copy, 0);
}
return toString(copy);
}
final String ecmaToXmlString(Node node) {
// See ECMA 357 Section 10.2.1
StringBuilder s = new StringBuilder();
int indentLevel = 0;
if (prettyPrint) {
for (int i = 0; i < indentLevel; i++) {
s.append(' ');
}
}
if (node instanceof Text) {
String data = ((Text) node).getData();
// TODO Does Java trim() work same as XMLWhitespace?
String v = (prettyPrint) ? data.trim() : data;
s.append(escapeElementValue(v));
return s.toString();
}
if (node instanceof Attr) {
String value = ((Attr) node).getValue();
s.append(escapeAttributeValue(value));
return s.toString();
}
if (node instanceof Comment) {
s.append("");
return s.toString();
}
if (node instanceof ProcessingInstruction) {
ProcessingInstruction pi = (ProcessingInstruction) node;
s.append("" + pi.getTarget() + " " + pi.getData() + "?>");
return s.toString();
}
s.append(elementToXmlString((Element) node));
return s.toString();
}
private void beautifyElement(Element e, int indent) {
StringBuilder s = new StringBuilder();
s.append('\n');
for (int i = 0; i < indent; i++) {
s.append(' ');
}
String afterContent = s.toString();
for (int i = 0; i < prettyIndent; i++) {
s.append(' ');
}
String beforeContent = s.toString();
// We "mark" all the nodes first; if we tried to do this loop otherwise, it would behave
// unexpectedly (the inserted nodes
// would contribute to the length and it might never terminate).
ArrayList toIndent = new ArrayList();
boolean indentChildren = false;
for (int i = 0; i < e.getChildNodes().getLength(); i++) {
if (i == 1) indentChildren = true;
if (e.getChildNodes().item(i) instanceof Text) {
toIndent.add(e.getChildNodes().item(i));
} else {
indentChildren = true;
toIndent.add(e.getChildNodes().item(i));
}
}
if (indentChildren) {
for (int i = 0; i < toIndent.size(); i++) {
e.insertBefore(e.getOwnerDocument().createTextNode(beforeContent), toIndent.get(i));
}
}
NodeList nodes = e.getChildNodes();
ArrayList list = new ArrayList();
for (int i = 0; i < nodes.getLength(); i++) {
if (nodes.item(i) instanceof Element) {
list.add((Element) nodes.item(i));
}
}
for (Element elem : list) {
beautifyElement(elem, indent + prettyIndent);
}
if (indentChildren) {
e.appendChild(e.getOwnerDocument().createTextNode(afterContent));
}
}
}