org.jvnet.staxex.util.DOMStreamReader Maven / Gradle / Ivy
/*
* Copyright (c) 1997, 2021 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0, which is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.jvnet.staxex.util;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import static org.w3c.dom.Node.*;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.util.Collections;
import java.util.Iterator;
/**
* Create an {@link XMLStreamReader} on top of a DOM tree.
*
*
* Since various libraries as well as users often create "incorrect" DOM node,
* this class spends a lot of efforts making sure that broken DOM trees are
* nevertheless interpreted correctly.
*
*
* For example, if a DOM level
* 1 tree is passed, each method will attempt to return the correct value
* by using {@link Node#getNodeName()}.
*
*
* Similarly, if DOM is missing explicit namespace declarations,
* this class attempts to emulate necessary declarations.
*
*
* @author [email protected]
* @author Kohsuke Kawaguchi
*/
public class DOMStreamReader implements XMLStreamReader, NamespaceContext {
/**
* Current DOM node being traversed.
*/
protected Node _current;
/**
* Starting node of the subtree being traversed.
*/
private Node _start;
/**
* Named mapping for attributes and NS decls for the current node.
*/
private NamedNodeMap _namedNodeMap;
/**
* If the reader points at {@link #CHARACTERS the text node},
* its whole value.
*
*
* This is simply a cache of {@link Text#getWholeText()} of {@link #_current},
* but when a large binary data sent as base64 text, this could get very much
* non-trivial.
*/
protected String wholeText;
/**
* List of attributes extracted from _namedNodeMap
.
*/
private final FinalArrayList _currentAttributes = new FinalArrayList<>();
/**
* {@link Scope} buffer.
*/
protected Scope[] scopes = new Scope[8];
/**
* Depth of the current element. The first element gets depth==0.
* Also used as the index to {@link #scopes}.
*/
protected int depth = 0;
/**
* State of this reader. Any of the valid states defined in StAX'
* XMLStreamConstants class.
*/
protected int _state;
/**
* Namespace declarations on one element.
*
* Instances are reused.
*/
protected static final class Scope {
/**
* Scope for the parent element.
*/
final Scope parent;
/**
* List of namespace declarations extracted from _namedNodeMap
*/
final FinalArrayList currentNamespaces = new FinalArrayList<>();
/**
* Additional namespace declarations obtained as a result of "fixing" DOM tree,
* which were not part of the original DOM tree.
*
* One entry occupies two spaces (prefix followed by URI.)
*/
final FinalArrayList additionalNamespaces = new FinalArrayList<>();
Scope(Scope parent) {
this.parent = parent;
}
void reset() {
currentNamespaces.clear();
additionalNamespaces.clear();
}
int getNamespaceCount() {
return currentNamespaces.size()+additionalNamespaces.size()/2;
}
String getNamespacePrefix(int index) {
int sz = currentNamespaces.size();
if(index< sz) {
Attr attr = currentNamespaces.get(index);
String result = attr.getLocalName();
if (result == null) {
result = QName.valueOf(attr.getNodeName()).getLocalPart();
}
return result.equals("xmlns") ? null : result;
} else {
return additionalNamespaces.get((index-sz)*2);
}
}
String getNamespaceURI(int index) {
int sz = currentNamespaces.size();
if(index< sz) {
return currentNamespaces.get(index).getValue();
} else {
return additionalNamespaces.get((index-sz)*2+1);
}
}
/**
* Returns the prefix bound to the given URI, or null.
* This method recurses to the parent.
*/
String getPrefix(String nsUri) {
for( Scope sp=this; sp!=null; sp=sp.parent ) {
for( int i=sp.currentNamespaces.size()-1; i>=0; i--) {
String result = getPrefixForAttr(sp.currentNamespaces.get(i),nsUri);
if(result!=null)
return result;
}
for( int i=sp.additionalNamespaces.size()-2; i>=0; i-=2 )
if(sp.additionalNamespaces.get(i+1).equals(nsUri))
return sp.additionalNamespaces.get(i);
}
return null;
}
/**
* Returns the namespace URI bound by the given prefix.
*
* @param prefix
* Prefix to look up.
*/
String getNamespaceURI(String prefix) {
String nsDeclName = prefix.length()==0 ? "xmlns" : "xmlns:"+prefix;
for( Scope sp=this; sp!=null; sp=sp.parent ) {
for( int i=sp.currentNamespaces.size()-1; i>=0; i--) {
Attr a = sp.currentNamespaces.get(i);
if(a.getNodeName().equals(nsDeclName))
return a.getValue();
}
for( int i=sp.additionalNamespaces.size()-2; i>=0; i-=2 )
if(sp.additionalNamespaces.get(i).equals(prefix))
return sp.additionalNamespaces.get(i+1);
}
return null;
}
}
public DOMStreamReader() {
}
public DOMStreamReader(Node node) {
setCurrentNode(node);
}
public void setCurrentNode(Node node) {
scopes[0] = new Scope(null);
depth=0;
_start = _current = node;
_state = START_DOCUMENT;
// verifyDOMIntegrity(node);
// displayDOM(node, System.out);
}
@Override
public void close() throws XMLStreamException {
}
/**
* Called when the current node is {@link Element} to look at attribute list
* (which contains both ns decl and attributes in DOM) and split them
* to attributes-proper and namespace decls.
*/
protected void splitAttributes() {
// Clear attribute and namespace lists
_currentAttributes.clear();
Scope scope = allocateScope();
_namedNodeMap = _current.getAttributes();
if (_namedNodeMap != null) {
final int n = _namedNodeMap.getLength();
for (int i = 0; i < n; i++) {
final Attr attr = (Attr) _namedNodeMap.item(i);
final String attrName = attr.getNodeName();
if (attrName.startsWith("xmlns:") || attrName.equals("xmlns")) { // NS decl?
scope.currentNamespaces.add(attr);
}
else {
_currentAttributes.add(attr);
}
}
}
// verify that all the namespaces used in element and attributes are indeed available
ensureNs(_current);
for( int i=_currentAttributes.size()-1; i>=0; i-- ) {
Attr a = _currentAttributes.get(i);
if(fixNull(a.getNamespaceURI()).length()>0)
ensureNs(a); // no need to declare "" for attributes in the default namespace
}
}
/**
* Sub-routine of {@link #splitAttributes()}.
*
*
* Makes sure that the namespace URI/prefix used in the given node is available,
* and if not, declare it on the current scope to "fix" it.
*
* It's often common to create DOM trees without putting namespace declarations,
* and this makes sure that such DOM tree will be properly marshalled.
*/
private void ensureNs(Node n) {
String prefix = fixNull(n.getPrefix());
String uri = fixNull(n.getNamespaceURI());
Scope scope = scopes[depth];
String currentUri = scope.getNamespaceURI(prefix);
if(prefix.length()==0) {
currentUri = fixNull(currentUri);
if(currentUri.equals(uri))
return; // declared correctly
} else {
if(currentUri!=null && currentUri.equals(uri))
return; // declared correctly
}
if(prefix.equals("xml") || prefix.equals("xmlns"))
return; // implicitly declared namespaces
// needs to be declared
scope.additionalNamespaces.add(prefix);
scope.additionalNamespaces.add(uri);
}
/**
* Allocate new {@link Scope} for {@link #splitAttributes()}.
*/
private Scope allocateScope() {
if(scopes.length==++depth) {
Scope[] newBuf = new Scope[scopes.length*2];
System.arraycopy(scopes,0,newBuf,0,scopes.length);
scopes = newBuf;
}
Scope scope = scopes[depth];
if(scope==null) {
scope = scopes[depth] = new Scope(scopes[depth-1]);
} else {
scope.reset();
}
return scope;
}
@Override
public int getAttributeCount() {
if (_state == START_ELEMENT)
return _currentAttributes.size();
throw new IllegalStateException("DOMStreamReader: getAttributeCount() called in illegal state");
}
/**
* Return an attribute's local name.Handle the case of DOM level 1 nodes.
* @return
*/
@Override
public String getAttributeLocalName(int index) {
if (_state == START_ELEMENT) {
String localName = _currentAttributes.get(index).getLocalName();
return (localName != null) ? localName :
QName.valueOf(_currentAttributes.get(index).getNodeName()).getLocalPart();
}
throw new IllegalStateException("DOMStreamReader: getAttributeLocalName() called in illegal state");
}
/**
* Return an attribute's qname. Handle the case of DOM level 1 nodes.
*/
@Override
public QName getAttributeName(int index) {
if (_state == START_ELEMENT) {
Node attr = _currentAttributes.get(index);
String localName = attr.getLocalName();
if (localName != null) {
String prefix = attr.getPrefix();
String uri = attr.getNamespaceURI();
return new QName(fixNull(uri), localName, fixNull(prefix));
}
else {
return QName.valueOf(attr.getNodeName());
}
}
throw new IllegalStateException("DOMStreamReader: getAttributeName() called in illegal state");
}
@Override
public String getAttributeNamespace(int index) {
if (_state == START_ELEMENT) {
String uri = _currentAttributes.get(index).getNamespaceURI();
return fixNull(uri);
}
throw new IllegalStateException("DOMStreamReader: getAttributeNamespace() called in illegal state");
}
@Override
public String getAttributePrefix(int index) {
if (_state == START_ELEMENT) {
String prefix = _currentAttributes.get(index).getPrefix();
return fixNull(prefix);
}
throw new IllegalStateException("DOMStreamReader: getAttributePrefix() called in illegal state");
}
@Override
public String getAttributeType(int index) {
if (_state == START_ELEMENT) {
return "CDATA";
}
throw new IllegalStateException("DOMStreamReader: getAttributeType() called in illegal state");
}
@Override
public String getAttributeValue(int index) {
if (_state == START_ELEMENT) {
return _currentAttributes.get(index).getNodeValue();
}
throw new IllegalStateException("DOMStreamReader: getAttributeValue() called in illegal state");
}
@Override
public String getAttributeValue(String namespaceURI, String localName) {
if (_state == START_ELEMENT) {
if (_namedNodeMap != null) {
Node attr = _namedNodeMap.getNamedItemNS(namespaceURI, localName);
return attr != null ? attr.getNodeValue() : null;
}
return null;
}
throw new IllegalStateException("DOMStreamReader: getAttributeValue() called in illegal state");
}
@Override
public String getCharacterEncodingScheme() {
return null;
}
@Override
public String getElementText() throws javax.xml.stream.XMLStreamException {
throw new RuntimeException("DOMStreamReader: getElementText() not implemented");
}
@Override
public String getEncoding() {
return null;
}
@Override
public int getEventType() {
return _state;
}
/**
* Return an element's local name.Handle the case of DOM level 1 nodes.
* @return
*/
@Override
public String getLocalName() {
if (_state == START_ELEMENT || _state == END_ELEMENT) {
String localName = _current.getLocalName();
return localName != null ? localName :
QName.valueOf(_current.getNodeName()).getLocalPart();
}
else if (_state == ENTITY_REFERENCE) {
return _current.getNodeName();
}
throw new IllegalStateException("DOMStreamReader: getAttributeValue() called in illegal state");
}
@Override
public Location getLocation() {
return DummyLocation.INSTANCE;
}
/**
* Return an element's qname. Handle the case of DOM level 1 nodes.
*/
@Override
public javax.xml.namespace.QName getName() {
if (_state == START_ELEMENT || _state == END_ELEMENT) {
String localName = _current.getLocalName();
if (localName != null) {
String prefix = _current.getPrefix();
String uri = _current.getNamespaceURI();
return new QName(fixNull(uri), localName, fixNull(prefix));
}
else {
return QName.valueOf(_current.getNodeName());
}
}
throw new IllegalStateException("DOMStreamReader: getName() called in illegal state");
}
@Override
public NamespaceContext getNamespaceContext() {
return this;
}
/**
* Verifies the current state to see if we can return the scope, and do so
* if appropriate.
*
* Used to implement a bunch of StAX API methods that have the same usage restriction.
*/
private Scope getCheckedScope() {
if (_state == START_ELEMENT || _state == END_ELEMENT) {
return scopes[depth];
}
throw new IllegalStateException("DOMStreamReader: neither on START_ELEMENT nor END_ELEMENT");
}
@Override
public int getNamespaceCount() {
return getCheckedScope().getNamespaceCount();
}
@Override
public String getNamespacePrefix(int index) {
return getCheckedScope().getNamespacePrefix(index);
}
@Override
public String getNamespaceURI(int index) {
return getCheckedScope().getNamespaceURI(index);
}
@Override
public String getNamespaceURI() {
if (_state == START_ELEMENT || _state == END_ELEMENT) {
String uri = _current.getNamespaceURI();
return fixNull(uri);
}
return null;
}
/**
* This method is not particularly fast, but shouldn't be called very
* often.If we start to use it more, we should keep track of the
NS declarations using a NamespaceContext implementation instead.
* @param prefix
* @return
*/
@Override
public String getNamespaceURI(String prefix) {
if (prefix == null) {
throw new IllegalArgumentException("DOMStreamReader: getNamespaceURI(String) call with a null prefix");
}
else if (prefix.equals("xml")) {
return "http://www.w3.org/XML/1998/namespace";
}
else if (prefix.equals("xmlns")) {
return "http://www.w3.org/2000/xmlns/";
}
// check scopes
String nsUri = scopes[depth].getNamespaceURI(prefix);
if(nsUri!=null) return nsUri;
// then ancestors above start node
Node node = findRootElement();
String nsDeclName = prefix.length()==0 ? "xmlns" : "xmlns:"+prefix;
while (node.getNodeType() != DOCUMENT_NODE) {
// Is ns declaration on this element?
NamedNodeMap namedNodeMap = node.getAttributes();
Attr attr = (Attr) namedNodeMap.getNamedItem(nsDeclName);
if (attr != null)
return attr.getValue();
node = node.getParentNode();
}
return null;
}
@Override
public String getPrefix(String nsUri) {
if (nsUri == null) {
throw new IllegalArgumentException("DOMStreamReader: getPrefix(String) call with a null namespace URI");
}
else if (nsUri.equals("http://www.w3.org/XML/1998/namespace")) {
return "xml";
}
else if (nsUri.equals("http://www.w3.org/2000/xmlns/")) {
return "xmlns";
}
// check scopes
String prefix = scopes[depth].getPrefix(nsUri);
if(prefix!=null) return prefix;
// then ancestors above start node
Node node = findRootElement();
while (node.getNodeType() != DOCUMENT_NODE) {
// Is ns declaration on this element?
NamedNodeMap namedNodeMap = node.getAttributes();
for( int i=namedNodeMap.getLength()-1; i>=0; i-- ) {
Attr attr = (Attr)namedNodeMap.item(i);
prefix = getPrefixForAttr(attr,nsUri);
if(prefix!=null)
return prefix;
}
node = node.getParentNode();
}
return null;
}
/**
* Finds the root element node of the traversal.
*/
private Node findRootElement() {
int type;
Node node = _start;
while ((type = node.getNodeType()) != DOCUMENT_NODE
&& type != ELEMENT_NODE) {
node = node.getParentNode();
}
return node;
}
/**
* If the given attribute is a namespace declaration for the given namespace URI,
* return its prefix. Otherwise null.
*/
private static String getPrefixForAttr(Attr attr, String nsUri) {
String attrName = attr.getNodeName();
if (!attrName.startsWith("xmlns:") && !attrName.equals("xmlns"))
return null; // not nsdecl
if(attr.getValue().equals(nsUri)) {
if(attrName.equals("xmlns"))
return "";
String localName = attr.getLocalName();
return (localName != null) ? localName :
QName.valueOf(attrName).getLocalPart();
}
return null;
}
@Override
public Iterator getPrefixes(String nsUri) {
// This is an incorrect implementation,
// but AFAIK it's not used in the JAX-WS runtime
String prefix = getPrefix(nsUri);
if(prefix==null) return Collections.emptyList().iterator();
else return Collections.singletonList(prefix).iterator();
}
@Override
public String getPIData() {
if (_state == PROCESSING_INSTRUCTION) {
return ((ProcessingInstruction) _current).getData();
}
return null;
}
@Override
public String getPITarget() {
if (_state == PROCESSING_INSTRUCTION) {
return ((ProcessingInstruction) _current).getTarget();
}
return null;
}
@Override
public String getPrefix() {
if (_state == START_ELEMENT || _state == END_ELEMENT) {
String prefix = _current.getPrefix();
return fixNull(prefix);
}
return null;
}
@Override
public Object getProperty(String str) throws IllegalArgumentException {
return null;
}
@Override
public String getText() {
if (_state == CHARACTERS)
return wholeText;
if(_state == CDATA || _state == COMMENT || _state == ENTITY_REFERENCE)
return _current.getNodeValue();
throw new IllegalStateException("DOMStreamReader: getTextLength() called in illegal state");
}
@Override
public char[] getTextCharacters() {
return getText().toCharArray();
}
@Override
public int getTextCharacters(int sourceStart, char[] target, int targetStart,
int targetLength) throws XMLStreamException {
String text = getText();
int copiedSize = Math.min(targetLength, text.length() - sourceStart);
text.getChars(sourceStart, sourceStart + copiedSize, target, targetStart);
return copiedSize;
}
@Override
public int getTextLength() {
return getText().length();
}
@Override
public int getTextStart() {
if (_state == CHARACTERS || _state == CDATA || _state == COMMENT || _state == ENTITY_REFERENCE) {
return 0;
}
throw new IllegalStateException("DOMStreamReader: getTextStart() called in illegal state");
}
@Override
public String getVersion() {
return null;
}
@Override
public boolean hasName() {
return (_state == START_ELEMENT || _state == END_ELEMENT);
}
@Override
public boolean hasNext() throws javax.xml.stream.XMLStreamException {
return (_state != END_DOCUMENT);
}
@Override
public boolean hasText() {
if (_state == CHARACTERS || _state == CDATA || _state == COMMENT || _state == ENTITY_REFERENCE) {
return getText().trim().length() > 0;
}
return false;
}
@Override
public boolean isAttributeSpecified(int param) {
return false;
}
@Override
public boolean isCharacters() {
return (_state == CHARACTERS);
}
@Override
public boolean isEndElement() {
return (_state == END_ELEMENT);
}
@Override
public boolean isStandalone() {
return true;
}
@Override
public boolean isStartElement() {
return (_state == START_ELEMENT);
}
@Override
public boolean isWhiteSpace() {
if (_state == CHARACTERS || _state == CDATA)
return getText().trim().length()==0;
return false;
}
private static int mapNodeTypeToState(int nodetype) {
switch (nodetype) {
case CDATA_SECTION_NODE:
return CDATA;
case COMMENT_NODE:
return COMMENT;
case ELEMENT_NODE:
return START_ELEMENT;
case ENTITY_NODE:
return ENTITY_DECLARATION;
case ENTITY_REFERENCE_NODE:
return ENTITY_REFERENCE;
case NOTATION_NODE:
return NOTATION_DECLARATION;
case PROCESSING_INSTRUCTION_NODE:
return PROCESSING_INSTRUCTION;
case TEXT_NODE:
return CHARACTERS;
default:
throw new RuntimeException("DOMStreamReader: Unexpected node type");
}
}
@Override
public int next() throws XMLStreamException {
while(true) {
int r = _next();
switch (r) {
case CHARACTERS:
// if we are currently at text node, make sure that this is a meaningful text node.
Node prev = _current.getPreviousSibling();
if(prev!=null && prev.getNodeType()==Node.TEXT_NODE)
continue; // nope. this is just a continuation of previous text that should be invisible
Text t = (Text)_current;
wholeText = t.getWholeText();
if(wholeText.length()==0)
continue; // nope. this is empty text.
return CHARACTERS;
case START_ELEMENT:
splitAttributes();
return START_ELEMENT;
default:
return r;
}
}
}
protected int _next() throws XMLStreamException {
Node child;
switch (_state) {
case END_DOCUMENT:
throw new IllegalStateException("DOMStreamReader: Calling next() at END_DOCUMENT");
case START_DOCUMENT:
// Don't skip document element if this is a fragment
if (_current.getNodeType() == ELEMENT_NODE) {
return (_state = START_ELEMENT);
}
child = _current.getFirstChild();
if (child == null) {
return (_state = END_DOCUMENT);
}
else {
_current = child;
return (_state = mapNodeTypeToState(_current.getNodeType()));
}
case START_ELEMENT:
child = _current.getFirstChild();
if (child == null) {
return (_state = END_ELEMENT);
}
else {
_current = child;
return (_state = mapNodeTypeToState(_current.getNodeType()));
}
case END_ELEMENT:
case CHARACTERS:
case COMMENT:
case CDATA:
case ENTITY_REFERENCE:
case PROCESSING_INSTRUCTION:
if (_state == END_ELEMENT) depth--;
// If at the end of this fragment, then terminate traversal
if (_current == _start) {
return (_state = END_DOCUMENT);
}
Node sibling = _current.getNextSibling();
if (sibling == null) {
_current = _current.getParentNode();
// getParentNode() returns null for fragments
_state = (_current == null || _current.getNodeType() == DOCUMENT_NODE) ?
END_DOCUMENT : END_ELEMENT;
return _state;
}
else {
_current = sibling;
return (_state = mapNodeTypeToState(_current.getNodeType()));
}
case DTD:
case ATTRIBUTE:
case NAMESPACE:
default:
throw new RuntimeException("DOMStreamReader: Unexpected internal state");
}
}
@Override
public int nextTag() throws javax.xml.stream.XMLStreamException {
int eventType = next();
while (eventType == CHARACTERS && isWhiteSpace()
|| eventType == CDATA && isWhiteSpace()
|| eventType == SPACE
|| eventType == PROCESSING_INSTRUCTION
|| eventType == COMMENT)
{
eventType = next();
}
if (eventType != START_ELEMENT && eventType != END_ELEMENT) {
throw new XMLStreamException("DOMStreamReader: Expected start or end tag");
}
return eventType;
}
@Override
public void require(int type, String namespaceURI, String localName)
throws javax.xml.stream.XMLStreamException
{
if (type != _state) {
throw new XMLStreamException("DOMStreamReader: Required event type not found");
}
if (namespaceURI != null && !namespaceURI.equals(getNamespaceURI())) {
throw new XMLStreamException("DOMStreamReader: Required namespaceURI not found");
}
if (localName != null && !localName.equals(getLocalName())) {
throw new XMLStreamException("DOMStreamReader: Required localName not found");
}
}
@Override
public boolean standaloneSet() {
return true;
}
// -- Debugging ------------------------------------------------------
/*
private static void displayDOM(Node node, java.io.OutputStream ostream) {
try {
System.out.println("\n====\n");
XmlUtil.newTransformer().transform(
new DOMSource(node), new StreamResult(ostream));
System.out.println("\n====\n");
}
catch (Exception e) {
e.printStackTrace();
}
}
private static void verifyDOMIntegrity(Node node) {
switch (node.getNodeType()) {
case ELEMENT_NODE:
case ATTRIBUTE_NODE:
// DOM level 1?
if (node.getLocalName() == null) {
System.out.println("WARNING: DOM level 1 node found");
System.out.println(" -> node.getNodeName() = " + node.getNodeName());
System.out.println(" -> node.getNamespaceURI() = " + node.getNamespaceURI());
System.out.println(" -> node.getLocalName() = " + node.getLocalName());
System.out.println(" -> node.getPrefix() = " + node.getPrefix());
}
if (node.getNodeType() == ATTRIBUTE_NODE) return;
NamedNodeMap attrs = node.getAttributes();
for (int i = 0; i < attrs.getLength(); i++) {
verifyDOMIntegrity(attrs.item(i));
}
case DOCUMENT_NODE:
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
verifyDOMIntegrity(children.item(i));
}
}
}
*/
private static String fixNull(String s) {
if(s==null) return "";
else return s;
}
}