All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.xml.tree.XmlDocumentBuilder Maven / Gradle / Ivy

The newest version!
/*
 * $Id: XmlDocumentBuilder.java,v 1.7 1999/04/16 20:23:28 db Exp $
 * 
 * Copyright (c) 1998-1999 Sun Microsystems, Inc. All Rights Reserved.
 * 
 * This software is the confidential and proprietary information of Sun
 * Microsystems, Inc. ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Sun.
 * 
 * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
 * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
 * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
 * THIS SOFTWARE OR ITS DERIVATIVES.
 */

package com.sun.xml.tree;


import java.io.IOException;

import java.net.URL;

import java.util.Dictionary;
import java.util.Hashtable;
import java.util.Locale;
import java.util.Vector;

import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.EntityReference;
import org.w3c.dom.DOMException;

import org.xml.sax.AttributeList;
import org.xml.sax.DocumentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import com.sun.xml.parser.AttributeListEx;
import com.sun.xml.parser.DtdEventListener;
import com.sun.xml.parser.LexicalEventListener;
import com.sun.xml.parser.Parser;
import com.sun.xml.parser.Resolver;


/**
 * This class is a SAX DocumentHandler which converts a stream of parse
 * events into an in-memory DOM document.  After each Parser.parse()
 * invocation returns, a resulting DOM Document may be accessed via the
 * getDocument method.  The parser and its builder should be used
 * together; the builder may be used with only one parser at a time.
 *
 * 

This builder optionally does XML namespace processing, reporting * conformance problems as recoverable errors using the parser's error * handler. If the parser is not a Sun parser, that handler will be * inaccessible and so such errors will always be fatal. Also, if that * handler does not treat such errors as fatal, processing will continue * without raising an exception. * *

To customize the document, a powerful technique involves using * an element factory specifying what element tags (from a given XML * namespace) correspond to what implementation classes. Parse trees * produced by such a builder can have nodes which add behaviors to * achieve application-specific functionality, such as modifing the tree * as it is parsed. * *

The object model here is that XML elements are polymorphic, with * semantic intelligence embedded through customized internal nodes. * Those nodes are created as the parse tree is built. Such trees now * build on the W3C Document Object Model (DOM), and other models may be * supported by the customized nodes. This allows both generic tools * (understanding generic interfaces such as the DOM core) and specialized * tools (supporting specialized behaviors, such as the HTML extensions * to the DOM core; or for XSL elements) to share data structures. * *

Normally only "model" semantics are in document data structures, * but "view" or "controller" semantics can be supported if desired. * *

Elements may choose to intercept certain parsing events directly. * They do this by overriding the default implementations of methods * in the XmlReadable interface. This is normally done to make * the DOM tree represent application level modeling requirements, rather * than matching an XML structure that may not be optimized appropriately. * * @author David Brownell * @version $Revision: 1.7 $ */ public class XmlDocumentBuilder implements LexicalEventListener { // implicit predeclarations of "xml" namespace private static final String xmlURI = "http://www.w3.com/XML/1998/namespace"; // used during parsing private XmlDocument document; private Locator locator; private ParseContextImpl context = new ParseContextImpl (); private Locale locale = Locale.getDefault (); private ElementFactory factory; private Parser parser; private Vector attrTmp = new Vector (); private ParentNode elementStack []; private int topOfStack; private boolean inDTD; private boolean inCDataSection; // parser modes private boolean ignoringLexicalInfo = true; private boolean disableNamespaces = true; /** * Default constructor is for use in conjunction with a SAX * parser's DocumentHandler callback. */ public XmlDocumentBuilder () { } /** * Returns true (the default) if certain lexical information is * automatically discarded when a DOM tree is built, producing * smaller parse trees that are easier to use. */ public boolean isIgnoringLexicalInfo () { return ignoringLexicalInfo; } /** * Controls whether certain lexical information is discarded; by * default, that information is discarded. * *

That information includes whitespace in element content which * is ignorable (note that some nonvalidating XML parsers will not * report that information); all comments; which text is found in * CDATA sections; and boundaries of entity references. * *

"Ignorable whitespace" as reported by parsers is whitespace * used to format XML markup. That is, all whitespace except that in * "mixed" or ANY content models is ignorable. When it is discarded, * pretty-printing may be necessary to make the document be readable * again by humans. * *

Whitespace inside "mixed" and ANY content models needs different * treatment, since it could be part of the document content. In such * cases XML defines a xml:space attribute which applications * should use to determine whether whitespace must be preserved (value * of the attribute is preserve) or whether default behavior * (such as eliminating leading and trailing space, and normalizing * consecutive internal whitespace to a single space) is allowed. * * @param value true indicates that such lexical information should * be discarded during parsing. */ public void setIgnoringLexicalInfo (boolean value) { ignoringLexicalInfo = value; } /** * Returns true if namespace conformance is not checked as the * DOM tree is built. */ public boolean getDisableNamespaces () { return disableNamespaces; } /** * Controls whether namespace conformance is checked during DOM * tree construction, or (the default) not. In this framework, the * DOM Builder is responsible for enforcing all namespace constraints. * When enabled, this makes constructing a DOM tree slightly slower. * (However, at this time it can't enforce the requirement that * parameter entity names not contain colons.) */ public void setDisableNamespaces (boolean value) { disableNamespaces = value; } /** * Sets the parser used by this builder. If this is a Sun parser, * error reports during parsing will use the parser's error handler, * and DTD event processing is replaced. The parser's document * handler is always set to this document builder. */ public void setParser (org.xml.sax.Parser p) { p.setDocumentHandler (this); if (p instanceof Parser) { parser = (Parser) p; parser.setDTDHandler (new DtdListener ()); } else parser = null; } /** * Returns the parser used by this builder, if it is recorded; * only Sun parsers are now recorded. */ public Parser getParser () { return parser; } /** * Returns the fruits of parsing, after a SAX parser has used this * as a document handler during parsing. */ public XmlDocument getDocument () { return document; } /** * Returns the locale to be used for diagnostic messages by * this builder, and by documents it produces. This uses * the locale of any associated parser. */ public Locale getLocale () { if (parser != null) return parser.getLocale (); else return locale; } /** * Assigns the locale to be used for diagnostic messages. * Multi-language applications, such as web servers dealing with * clients from different locales, need the ability to interact * with clients in languages other than the server's default. * *

When an XmlDocument is created, its locale is the default * locale for the virtual machine. If a parser was recorded, * the locale will be associated with that parser. * * @see #chooseLocale */ public void setLocale (Locale locale) throws SAXException { if (locale == null) locale = Locale.getDefault (); if (parser != null) parser.setLocale (locale); this.locale = locale; } /** * Chooses a client locale to use for diagnostics, using the first * language specified in the list that is supported by this builder. * That locale is then automatically assigned using setLocale(). Such a list * could be provided by a variety of user preference mechanisms, * including the HTTP Accept-Language header field. * * @see com.sun.xml.util.MessageCatalog * @see com.sun.xml.parser.Parser#chooseLocale * * @param languages Array of language specifiers, ordered with the most * preferable one at the front. For example, "en-ca" then "fr-ca", * followed by "zh_CN". Both RFC 1766 and Java styles are supported. * @return The chosen locale, or null. */ public Locale chooseLocale (String languages []) throws SAXException { Locale l = XmlDocument.catalog.chooseLocale (languages); if (l != null) setLocale (l); return l; } // Document operations ... /** * SAX DocumentHandler callback, not for general application * use. Reports the locator object which will be used in reporting * diagnostics and interpreting relative URIs in attributes and text. * * @param locator used to identify a location in an XML document * being parsed. */ public void setDocumentLocator (Locator locator) { this.locator = locator; } /** * Returns the document locator provided by the SAX parser. This * is commonly used in diagnostics, and when interpreting relative * URIs found in XML Processing Instructions or other parts of an * XML document. This locator is only valid during document handler * callbacks. */ public Locator getDocumentLocator () { return locator; } /** * This is a factory method, used to create an XmlDocument. * Subclasses may override this method, for example to provide * document classes with particular behaviors, or provide * particular factory behaviours (such as returning elements * that support the HTML DOM methods, if they have the right * name and are in the right namespace). */ public XmlDocument createDocument () { XmlDocument retval = new XmlDocument (); if (factory != null) retval.setElementFactory (factory); return retval; } /** * Assigns the factory to be associated with documents produced * by this builder. */ final public void setElementFactory (ElementFactory factory) { this.factory = factory; } /** * Returns the factory to be associated with documents produced * by this builder. */ final public ElementFactory getElementFactory () { return factory; } /** * SAX DocumentHandler callback, not for general application * use. Reports that the parser is beginning to process a document. */ public void startDocument () throws SAXException { document = createDocument (); if (locator != null) document.setSystemId (locator.getSystemId ()); // // XXX don't want fixed size limits! Fix someday. For // now, wide trees predominate, not deep ones. This is // allowing a _very_ deep tree ... we typically observe // depths on the order of a dozen. // elementStack = new ParentNode [200]; topOfStack = 0; elementStack [topOfStack] = document; inDTD = false; document.startParse (context); } /** * SAX DocumentHandler callback, not for general application * use. Reports that the document has been fully parsed. */ public void endDocument () throws SAXException { if (topOfStack != 0) throw new IllegalStateException (getMessage ("XDB-000")); document.doneParse (context); document.trimToSize (); } private String getNamespaceURI (String prefix) { if ("xml".equals (prefix)) return xmlURI; else if ("xmlns".equals (prefix)) return null; else return elementStack [topOfStack] .getInheritedAttribute ("xmlns:" + prefix); } /** * SAX DocumentHandler callback, not for general application * use. Reports that the parser started to parse a new element, * with the given tag and attributes, and call its startParse * method. * * @exception SAXParseException if XML namespace support is enabled * and the tag or any attribute name contain more than one colon. * @exception SAXException as appropriate, such as if a faulty parser * provides an element or attribute name which is illegal. */ public void startElement (String tag, AttributeList attributes) throws SAXException { AttributeSet attrs = null; ElementNode e = null; int length; // // Convert set of attributes to DOM representation. // length = (attributes == null) ? 0 : attributes.getLength (); if (length != 0) { try { if (!disableNamespaces) { // // ID, IDREF(S), ENTITY(IES), NOTATION must be free of // colons in value ... only CDATA and NMTOKEN(S) excepted. // for (int i = 0; i < length; i++) { String type = attributes.getType (i); if ("CDATA".equals (type) || type.startsWith ("NMTOKEN")) continue; if (attributes.getValue (i).indexOf (':') != -1) error (new SAXParseException ((getMessage ("XDB-001", new Object [] { attributes.getName (i) })), locator)); } } attrs = new AttributeSet (attributes); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-002", new Object[] { ex.getMessage () })), locator, ex)); } } // // Then create the element, associate its attributes, and // stack it for later addition. // try { if (disableNamespaces) e = (ElementNode) document.createElementEx (tag); else { int index = tag.indexOf (':'); String attribute = "xmlns"; String namespace = ""; String localPart = tag; if (index != -1) { attribute = "xmlns:" + tag.substring (0, index); localPart = tag.substring (index + 1); if (tag.lastIndexOf (':') != index) error (new SAXParseException ((getMessage ("XDB-003", new Object [] { tag })), locator)); } // Note: empty namespace URIs are ignored; the // namespace spec doesn't say what they should do. if (attrs != null) namespace = attrs.getValue (attribute); if ("".equals (namespace)) namespace = elementStack [topOfStack] .getInheritedAttribute (attribute); e = (ElementNode) document.createElementEx (namespace, localPart); // remember whatever prefix it came with if (localPart != tag) e.setTag (tag); } } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } if (attributes != null && attributes instanceof AttributeListEx) e.setIdAttributeName ( ((AttributeListEx)attributes).getIdAttributeName ()); if (length != 0) e.setAttributes (attrs); elementStack [topOfStack++].appendChild (e); elementStack [topOfStack] = e; e.startParse (context); // // Division of responsibility for namespace processing is (being // revised so) that the DOM builder reports errors when namespace // constraints are violated, and the parser is ignorant of them. // if (!disableNamespaces) { int index; String prefix; // Element prefix must be declared index = tag.indexOf (':'); if (index > 0) { prefix = tag.substring (0, index); if (getNamespaceURI (prefix) == null) error (new SAXParseException ((getMessage ("XDB-005", new Object [] { prefix })), locator)); } // Attribute prefixes must be declared, and only one instance // of a given attribute (scope + local part) may appear if (length != 0) { // invariant: attrTmp empty except in this block for (int i = 0; i < length; i++) { String name = attrs.item (i).getNodeName (); index = name.indexOf (':'); if (index > 0) { String uri; prefix = name.substring (0, index); // "xmlns" is like a keyword if ("xmlns".equals (prefix)) continue; uri = getNamespaceURI (prefix); if (uri == null) { error (new SAXParseException ((getMessage ("XDB-006", new Object [] { prefix })), locator)); continue; } if (name.lastIndexOf (':') != index) error (new SAXParseException ((getMessage ("XDB-007", new Object [] { name })), locator)); // Unicode ffff -- illegal in URIs and XML names; // the value is otherwise irrelevant name = name.substring (index + 1); name = uri + '\uffff' + name; if (attrTmp.contains (name)) // duplicating attributes is a well-formedness // error, but we don't interpret violations of // namespace conformance as fatal errors if // we have a chance. error (new SAXParseException ((getMessage ("XDB-008", new Object [] { attrs.item (i).getNodeName () })), locator)); else attrTmp.addElement (name); } } attrTmp.setSize (0); } } } /** * SAX DocumentHandler callback, not for general application * use. Reports that the parser finished the current element. * The element's doneParse method is then called. * * @exception SAXException as appropriate */ public void endElement (String tag) throws SAXException { ElementNode e = (ElementNode) elementStack [topOfStack]; elementStack [topOfStack--] = null; // Trusting that the SAX parser is correct, and hasn't // mismatched start/end element callbacks. // if (!tag.equals (e.getTagName ())) // fatal (new SAXParseException ((getMessage ("XDB-009", new // Object[] { tag, e.getTagName () })), locator)); try { e.doneParse (context); e.reduceWaste (); // use less space elementStack [topOfStack].doneChild (e, context); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } /** * LexicalEventListener callback, not for general application * use. Reports that a comment was found in the document. * * If this builder is set to record lexical information (by default * it ignores such information) then this callback records a comment * in the DOM tree. * @param text body of the comment. */ public void comment (String text) throws SAXException { // Ignore comments if lexical info is to be ignored, // or if parsing the DTD if (ignoringLexicalInfo || inDTD) return; Comment comment = document.createComment (text); ParentNode top = elementStack [topOfStack]; try { top.appendChild (comment); top.doneChild ((NodeEx) comment, context); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } /** * LexicalEventListener callback, not for general application * use. Reports that CDATA section was begun. * *

If this builder is set to record lexical information (by default * it ignores such information) then this callback arranges that * character data (and ignorable whitespace) be recorded as part of * a CDATA section, until the matching endCDATA method is * called. */ public void startCDATA () throws SAXException { if (ignoringLexicalInfo) return; CDATASection text = document.createCDATASection (""); ParentNode top = elementStack [topOfStack]; try { inCDataSection = true; top.appendChild (text); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } /** * LexicalEventListener callback, not for general application * use. Reports that CDATA section was completed. * This terminates any CDATA section that is being constructed. */ public void endCDATA () throws SAXException { if (!inCDataSection) return; ParentNode top = elementStack [topOfStack]; try { inCDataSection = false; top.doneChild ((NodeEx) top.getLastChild (), context); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } /** * SAX DocumentHandler callback, not for general application * use. Reports text which is part of the document, and which will * be provided stored as a Text node. * *

Some parsers report "ignorable" whitespace through this interface, * which can cause portability problems. That's because there is no safe * way to discard it from a parse tree without accessing DTD information, * of a type which DOM doesn't expose and most applications won't want * to deal with. Avoid using such parsers. * * @param buf holds text characters * @param offset initial index of characters in buf * @param len how many characters are being passed * @exception SAXException as appropriate */ public void characters (char buf [], int offset, int len) throws SAXException { ParentNode top = elementStack [topOfStack]; if (inCDataSection) { String temp = new String (buf, offset, len); CDATASection section; section = (CDATASection) top.getLastChild (); section.appendData (temp); return; } try { NodeBase lastChild = (NodeBase) top.getLastChild (); if (lastChild instanceof TextNode) { String tmp = new String (buf, offset, len); ((TextNode)lastChild).appendData (tmp); } else { TextNode text = document.newText (buf, offset, len); top.appendChild (text); top.doneChild (text, context); } } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } /** * SAX DocumentHandler callback, not for general application * use. Reports ignorable whitespace; if lexical information is * not ignored (by default, it is ignored) the whitespace reported * here is recorded in a DOM text (or CDATA, as appropriate) node. * * @param buf holds text characters * @param offset initial index of characters in buf * @param len how many characters are being passed * @exception SAXException as appropriate */ public void ignorableWhitespace (char buf [], int offset, int len) throws SAXException { if (ignoringLexicalInfo) return; ParentNode top = elementStack [topOfStack]; if (inCDataSection) { String temp = new String (buf, offset, len); CDATASection section; section = (CDATASection) top.getLastChild (); section.appendData (temp); return; } TextNode text = document.newText (buf, offset, len); try { top.appendChild (text); top.doneChild (text, context); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } /** * SAX DocumentHandler callback, not for general application * use. Reports that a processing instruction was found. * *

Some applications may want to intercept processing instructions * by overriding this method as one way to make such instructions * take immediate effect during parsing, or to ensure that * processing instructions in DTDs aren't ignored. * * @param name the processor to which the instruction is directed * @param instruction the text of the instruction (no leading spaces) * @exception SAXParseException if XML namespace support is enabled * and the name contains a colon. * @exception SAXException as appropriate */ public void processingInstruction (String name, String instruction) throws SAXException { if (!disableNamespaces && name.indexOf (':') != -1) error (new SAXParseException ((getMessage ("XDB-010")), locator)); // Ignore PIs in DTD for DOM support if (inDTD) return; ParentNode top = elementStack [topOfStack]; PINode pi; try { pi = (PINode) document.createProcessingInstruction (name, instruction); top.appendChild (pi); top.doneChild (pi, context); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } /** * LexicalEventListener callback, not for general application * use. Reports the begining of processing for a general entity. * *

If this builder is set to record lexical information (by default * it ignores such information) then this callback arranges that * an entity reference node hold data that is reported until the * matching endParsedEntity callback. Otherwise that data * is treated like any other content found in a document (and will * not be marked as readonly). * * @param name identifies the parsed general entity whose * expansion will be represented in the DOM tree. */ public void startParsedEntity (String name) throws SAXException { if (ignoringLexicalInfo) return; EntityReference e = document.createEntityReference (name); elementStack [topOfStack++].appendChild (e); elementStack [topOfStack] = (ParentNode) e; } /** * LexicalEventListener callback, not for general application * use. Reports that the parser finished handling a general * entity. If an entity reference was being recorded, this * callback marks the entity being expanded as read only. * * @param name identifies the parsed general entity whose * expansion will be represented in the DOM tree. * @param included lets nonvalidating XML parser tell applications * about any external entities that were recognized but not included. */ public void endParsedEntity (String name, boolean included) throws SAXException { ParentNode entity = elementStack [topOfStack]; if (!(entity instanceof EntityReference)) return; entity.setReadonly (true); elementStack [topOfStack--] = null; if (!name.equals (entity.getNodeName ())) fatal (new SAXParseException ((getMessage ("XDB-011", new Object[] { name, entity.getNodeName () })), locator)); try { elementStack [topOfStack].doneChild (entity, context); } catch (DOMException ex) { fatal (new SAXParseException ((getMessage ("XDB-004", new Object [] { ex.getMessage () })), locator, ex)); } } // mostly for namespace errors private void error (SAXParseException err) throws SAXException { if (parser != null) parser.getErrorHandler ().error (err); else throw err; } private void fatal (SAXParseException err) throws SAXException { if (parser != null) parser.getErrorHandler ().fatalError (err); throw err; } class ParseContextImpl implements ParseContext { public ErrorHandler getErrorHandler () { return (parser != null) ? parser.getErrorHandler () : null; } public Locale getLocale () { return XmlDocumentBuilder.this.getLocale (); } public Locator getLocator () { return locator; } } // // We really want to be able to use this ... not only does it // build the DOM DocumentType object, but it also does many // of the namespace conformance tests that SAX alone can't // support. // class DtdListener implements DtdEventListener { private Doctype doctype; private String publicId; private String systemId; private String internalSubset; public void startDtd (String root) { doctype = document.createDoctype (root); XmlDocumentBuilder.this.inDTD = true; } public void externalDtdDecl (String p, String s) throws SAXException { publicId = p; systemId = s; } public void internalDtdDecl (String s) throws SAXException { internalSubset = s; } public void externalEntityDecl (String n, String p, String s) throws SAXException { if (!disableNamespaces && n.indexOf (':') != -1) error (new SAXParseException ((getMessage ("XDB-012")), locator)); doctype.addEntityNode (n, p, s, null); } public void internalEntityDecl (String n, String v) throws SAXException { if (!disableNamespaces && n.indexOf (':') != -1) error (new SAXParseException ((getMessage ("XDB-012")), locator)); doctype.addEntityNode (n, v); } public void notationDecl (String n, String p, String s) throws SAXException { if (!disableNamespaces && n.indexOf (':') != -1) error (new SAXParseException ((getMessage ("XDB-013")), locator)); doctype.addNotation (n, p, s); } public void unparsedEntityDecl (String n, String p, String s, String t) throws SAXException { if (!disableNamespaces && n.indexOf (':') != -1) error (new SAXParseException ((getMessage ("XDB-012")), locator)); doctype.addEntityNode (n, p, s, t); } public void elementDecl (String elementName, String contentModel) throws SAXException { // ignored } public void attributeDecl ( String elementName, String attributeName, String attributeType, String options [], String defaultValue, boolean isFixed, boolean isRequired ) throws SAXException { // ignored } public void endDtd () { doctype.setPrintInfo (publicId, systemId, internalSubset); document.appendChild (doctype); XmlDocumentBuilder.this.inDTD = false; } } /* * Gets the messages from the resource bundles for the given messageId. */ String getMessage (String messageId) { return getMessage (messageId, null); } /* * Gets the messages from the resource bundles for the given messageId * after formatting it with the parameters passed to it. */ String getMessage (String messageId, Object[] parameters) { if (locale == null) { getLocale (); } return XmlDocument.catalog.getMessage (locale, messageId, parameters); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy