All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.oodt.commons.util.XML Maven / Gradle / Ivy

There is a newer version: 1.9.1
Show newest version
// Licensed to the Apache Software Foundation (ASF) under one or more contributor
// license agreements.  See the NOTICE.txt file distributed with this work for
// additional information regarding copyright ownership.  The ASF licenses this
// file to you under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License.  You may obtain a copy of
// the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
// License for the specific language governing permissions and limitations under
// the License.
//
// Portions of this code come from the Apache XML Project's Xerces 1.0.3 XML Parser
// (specifically, the functions escape and getEntityRef).  Apache license applies:
//
// The Apache Software License, Version 1.1
//
// Copyright (c) 1999 The Apache Software Foundation.  All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification, are
// permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of
//     conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
//    of conditions and the following disclaimer in the documentation and/or other materials
//    provided with the distribution.
//
// 3. The end-user documentation included with the redistribution, if any, must include
//    the following acknowledgment:
//
//    "This product includes software developed by the Apache Software Foundation (http://www.apache.org/)."
//
//    Alternately, this acknowledgment may appear in the software itself,
//    if and wherever such third-party acknowledgments normally appear.
//
// 4. The names "Xerces" and "Apache Software Foundation" must not be used to endorse or
//    promote products derived from this software without prior written permission. For
//    written permission, please contact [email protected].
//
// 5. Products derived from this software may not be called "Apache", nor may "Apache"
//    appear in their name, without prior written permission of the Apache Software
//    Foundation.
//
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
// ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// $Id: XML.java,v 1.2 2005-05-01 22:49:55 cmattmann Exp $

package org.apache.oodt.commons.util;

import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.logging.Logger;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

/** XML services.
 *
 * This class provides several XML convenience services and encapsulates the underlying
 * XML implementation, allowing it to vary without impacting developers.
 *
 * @author Kelly
 */
public class XML {
  private static Logger LOG = Logger.getLogger(XML.class.getName());
	private static DocumentBuilder getStandardDocumentBuilder() {
		try {
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			factory.setCoalescing(false);
			factory.setExpandEntityReferences(false);
			factory.setIgnoringComments(false);
			factory.setIgnoringElementContentWhitespace(true);
			factory.setNamespaceAware(true);
			factory.setValidating(true);
			return factory.newDocumentBuilder();
		} catch (ParserConfigurationException ex) {
			throw new IllegalStateException("Unexpected ParserConfigurationException: " + ex.getMessage());
		}
	}

	/** Get the DOM implementation.
	 *
	 * @return The DOM implementation.
	 */
	public static DOMImplementation getDOMImplementation() {
		return getStandardDocumentBuilder().getDOMImplementation();
	}

	/** Create a DOM document.
	 *
	 * @return A new DOM document.
	 */
	public static Document createDocument() {
		return getStandardDocumentBuilder().newDocument();
	}

	/** Create a DOM parser.
	 *
	 * This method creates a new DOM parser that has validation turned on and
	 * ignorable whitespace not included, and has a default error handler that prints
	 * error messages and warnings to the standard error stream.
	 *
	 * @return A new DOM parser.
	 */
	public static DOMParser createDOMParser() {
		DocumentBuilder builder = getStandardDocumentBuilder();
		builder.setEntityResolver(ENTERPRISE_ENTITY_RESOLVER);
		builder.setErrorHandler(new ErrorHandler() {
			public void error(SAXParseException ex) {
				System.err.println("Parse error: " + ex.getMessage());
				ex.printStackTrace();
			}
			public void warning(SAXParseException ex) {
				System.err.println("Parse warning: " + ex.getMessage());
			}
			public void fatalError(SAXParseException ex) {
				System.err.println("Fatal parse error: " + ex.getMessage());
				ex.printStackTrace();
			}
		});
		return new DOMParser(builder);
	}

	/** Create a SAX parser.
	 *
	 * This method creates a new, default SAX parser.  It's set up with a default
	 * error handler that just prints messages to the standard error stream.
	 *
	 * @return A new SAX parser.
	 */
	public static SAXParser createSAXParser() {
		try {
			SAXParserFactory factory = SAXParserFactory.newInstance();
			factory.setNamespaceAware(false);
			factory.setValidating(false);
			javax.xml.parsers.SAXParser saxParser = factory.newSAXParser();
			saxParser.getXMLReader().setEntityResolver(ENTERPRISE_ENTITY_RESOLVER);
			saxParser.getXMLReader().setErrorHandler(new ErrorHandler() {
				public void error(SAXParseException ex) {
					System.err.println("Parse error: " + ex.getMessage());
				}
				public void warning(SAXParseException ex) {
					System.err.println("Parse warning: " + ex.getMessage());
				}
				public void fatalError(SAXParseException ex) {
					System.err.println("Fatal parse error: " + ex.getMessage());
				}
			});
		  return new SAXParser(saxParser);
		} catch (ParserConfigurationException ex) {
			throw new IllegalStateException("Unexpected ParserConfigurationException: " + ex.getMessage());
		} catch (SAXException ex) {
			throw new IllegalStateException("Unexpected SAXException: " + ex.getMessage());
		}
	}

	/** Serialize an XML DOM document into a String.
	 *
	 * This method takes a DOM document and pretty-prints (or pretty-serializes, in
	 * XML parlance) it into a string.
	 *
	 * @param doc The document.
	 * @param omitXMLDeclaration True if we should omit the XML declaration, false to keep the XML declaration.
	 * @return The pretty-serialized, stringified, document.
	 */
	public static String serialize(Document doc, boolean omitXMLDeclaration) {
		StringWriter writer = new StringWriter();
	  serialize(doc, writer, omitXMLDeclaration);
	  return writer.getBuffer().toString();
	}

	/** Serialize an XML DOM document into a String.
	 *
	 * This method takes a DOM document and pretty-prints (or pretty-serializes, in
	 * XML parlance) it into a string.
	 *
	 * @param doc The document.
	 * @return The pretty-serialized, stringified, document.
	 */
	public static String serialize(Document doc) {
		return serialize(doc, /*omitXMLDeclaration*/false);
	}

	/** Serialize an XML DOM document into a writer.
	 *
	 * This method takes a DOM document and pretty-prints (or pretty-serializes, in
	 * XML parlance) it into a writer.
	 *
	 * @param doc The document.
	 * @param writer Where to write it.
	 * @param omitXMLDeclaration True if we should omit the XML declaration, false to keep the XML declaration.
	 * @throws IOException If an I/O error occurs.
	 */
	public static void serialize(Document doc, Writer writer, boolean omitXMLDeclaration) {
		try {
			TransformerFactory factory = TransformerFactory.newInstance();
			Transformer transformer = factory.newTransformer();
			
			//update 05/01/2005
			//author: Chris Mattmann
			//set properties to pretty print
			//and also to include the DOCTYPE when serializing the XML
			
			//only set this if it's not null else we get a nice NullPointerException
			if(doc.getDoctype().getPublicId() != null){
				transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, doc.getDoctype().getPublicId());			    
			}

			//only set this if it's not null else we get a nice NullPointerException
			if(doc.getDoctype().getSystemId() != null){
				transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, doc.getDoctype().getSystemId());			    
			}

			transformer.setOutputProperty(OutputKeys.INDENT,"yes");
			
			DOMSource source = new DOMSource(doc);
			StreamResult result = new StreamResult(writer);
			transformer.transform(source, result);
		} catch (TransformerConfigurationException ex) {
			throw new IllegalStateException("Unexpected TransformerConfigurationException: " + ex.getMessage());
		} catch (TransformerException ex) {
			throw new IllegalStateException("Unexpected TransformerException: " + ex.getMessage());
		}
	}

	/** Serialize an XML DOM document into a writer.
	 *
	 * This method takes a DOM document and pretty-prints (or pretty-serializes, in
	 * XML parlance) it into a writer.
	 *
	 * @param doc The document.
	 * @param writer Where to write it.
	 * @throws IOException If an I/O error occurs.
	 */
	public static void serialize(Document doc, Writer writer) {
		serialize(doc, writer, /*omitXMLDeclaration*/false);
	}

	/** Parse the given XML document into a DOM tree.
	 *
	 * @param inputSource The XML document to parse.
	 * @return A DOM tree for the given XML document.
	 * @throws SAXException If a parse error occurs.
	 * @throws IOException If an I/O error occurs.
	 */
	public static Document parse(InputSource inputSource) throws SAXException, IOException {
		DOMParser parser = XML.createDOMParser();
		parser.parse(inputSource);
		return parser.getDocument();
	}

	/** Parse the given XML document into a DOM tree.
	 *
	 * @param string The XML document to parse.
	 * @return A DOM tree for the given XML document.
	 * @throws SAXException If a parse error occurs.
	 */
	public static Document parse(String string) throws SAXException {
		Document doc;
		try {
			DOMParser parser = XML.createDOMParser();
			StringReader reader = new StringReader(string);
			InputSource inputSource = new InputSource(reader);
			doc = parse(inputSource);
			reader.close();
		} catch (IOException cantHappen) {
			cantHappen.printStackTrace();
			throw new RuntimeException("I/O exception " + cantHappen.getClass().getName()
				+ " can NOT have happened, yet it did!  Message: " + cantHappen.getMessage());
		}
		return doc;
	}

	/** Parse the given XML document into a DOM tree.
	 *
	 * @param reader The XML document to parse.
	 * @return A DOM tree for the given XML document.
	 * @throws SAXException If a parse error occurs.
	 * @throws IOException If an I/O error occurs.
	 */
	public static Document parse(Reader reader) throws SAXException, IOException {
		return parse(new InputSource(reader));
	}

	/** Parse the given XML document into a DOM tree.
	 *
	 * @param inputStream The XML document to parse.
	 * @return A DOM tree for the given XML document.
	 * @throws SAXException If a parse error occurs.
	 * @throws IOException If an I/O error occurs.
	 */
	public static Document parse(InputStream inputStream) throws SAXException, IOException {
		return parse(new InputStreamReader(inputStream));
	}

	/** Add a repeating child element with text from the given collection to the given node.
	 *
	 * For example, if values is a {@link java.util.List} with strings items
	 * "a", "b", and "c", and name is "value", then the XML document will
	 * have
	 * 
<value>a</value>
	 * <value>b</value>
	 * <value>c</value>
* appended to node. * * @param node Node to which to add child elements. * @param name Name to give each child element. * @param values Collection of values to assign to each child element, in iterator order. * @throws DOMException If a DOM error occurs. */ public static void add(Node node, String name, Collection values) throws DOMException { for (Object value : values) { add(node, name, value); } } /** Add a child element with the given text to the given element. * * This method modifies your DOM tree so that *
<node>
	 *   ...
	 * </node>
* becomes *
<node>
	 *   ...
	 *   <name>text>/name>
	 * </node>
* * Adding a null name does nothing. Adding null text * won't add the element. * * @param node Node to which to add a child element. * @param name Name of the child element to add to node. * @param text What text the text-node child the child element named name should have. If null, * nothing happens. * @throws DOMException If a DOM error occurs. */ public static void addNonNull(Node node, String name, String text) throws DOMException { if (text == null) { return; } add(node, name, text); } /** Add a child element with the given text to the given element. * * This method modifies your DOM tree so that *
<node>
	 *   ...
	 * </node>
* becomes *
<node>
	 *   ...
	 *   <name>text>/name>
	 * </node>
* * Adding a null name does nothing. Adding null text * results in an empty name tag. * * @param node Node to which to add a child element. * @param name Name of the child element to add to node. * @param text What text the text-node child the child element named name should have. * @throws DOMException If a DOM error occurs. */ public static void add(Node node, String name, String text) throws DOMException { if (name == null) { return; } if (node == null) { throw new IllegalArgumentException("Can't add to a null node"); } Document doc = node.getOwnerDocument(); Element element = doc.createElement(name); if (text != null) { element.appendChild(doc.createTextNode(text)); } node.appendChild(element); } /** Add a child element with the string representation of the given * object to the given node. * * This method modifies your DOM tree so that *
<node>
	 *   ...
	 * </node>
* becomes *
<node>
	 *   ...
	 *   <name>string-rep>/name>
	 * </node>
* * Adding a null name does nothing. Adding null object * results in an empty name tag. * * @param node Node to which to add a child element. * @param name Name of the child element to add to node. * @param object The string representation of the object to have as the text-node * child the child element named name. * @throws DOMException If a DOM error occurs. */ public static void add(Node node, String name, Object object) throws DOMException { add(node, name, object == null? null : object.toString()); } /** Get unwrapped text from the given DOM node * * This method unwraps any wrapped text. For example, if the document contains *
<node>Hello, world.  This is
	 *    my first document.
         * </node>
* then the node's unwrapped text is *
Hello, world.  This is my first document.
* while the {@link #text} method would return the wrapped value *
Hello, world.  This is
	 *    my first document.
* *

In other words, it collects the text nodes under the given node and replaces * strings of newlines and spaces with a single space. Unwrapping a null node * returns a null string. * * @param node The node. * @return The text in its children, unwrapped. */ public static String unwrappedText(Node node) { if (node == null) { return null; } StringBuffer buffer = new StringBuffer(); StringBuilder wrapped = new StringBuilder(text1(node, buffer)); boolean newline = false; for (int i = 0; i < wrapped.length(); ++i) { if (!newline) { if (wrapped.charAt(i) == '\n') { newline = true; wrapped.setCharAt(i, ' '); } } else { if (Character.isWhitespace(wrapped.charAt(i))) { wrapped.deleteCharAt(i); --i; } else { newline = false; } } } return wrapped.toString().trim(); } /** Get the text from the given DOM node. * * Getting text from a null node gives you a null string. * * @param node The node. * @return The text in its children. */ public static String text(Node node) { // [ return text(node) ] StringBuffer buffer = new StringBuffer(); return text1(node, buffer); } /** Dump the structure of the DOM tree rooted at the given node to the given writer. * * This outputs the tree structure including the type of each node, its name, and * its value. Note that for many nodes, the name isn't useful (the name of text * nodes, for example, is #text), and for many nodes, the value is * null. * * @param writer The writer to which write the tree structure. * @param node The tree to output. */ public static void dump(PrintWriter writer, Node node) { dump(writer, node, 0); } /** Remove all comments from the given document node's subtree. * * @param node Node from which to search for comments to nuke. */ public static void removeComments(Node node) { List commentNodes = new ArrayList(); findCommentNodes(commentNodes, node); for (Object commentNode1 : commentNodes) { Node commentNode = (Node) commentNode1; commentNode.getParentNode().removeChild(commentNode); } } /** The resolver for entities for the JPL enterprise. */ public static final EntityResolver ENTERPRISE_ENTITY_RESOLVER = new EnterpriseEntityResolver(); /** An empty XML DOM document. This is handy for some basic operations, and for * fetching the DOM implementation. */ public static final Document EMPTY_DOCUMENT = org.apache.oodt.commons.util.XML.createDocument(); /** Identifies the last printable character in the Unicode range that is supported * by the encoding used with this serializer. For 8-bit encodings this will be either * 0x7E or 0xFF. For 16-bit encodings this will be 0xFFFF. Characters that are not * printable will be escaped using character references. * *

Taken from Xerces 1.0.3. Apache license applies; see source code for * license. * */ private static int LAST_PRINTABLE = 0x7E; /** Escapes a string so it may be printed as text content or attribute value. Non * printable characters are escaped using character references. Where the format * specifies a deault entity reference, that reference is used * (e.g. &lt;). * *

Taken from Xerces 1.0.3. Apache license applies; see source code for * license. * * @param source The string to escape * @return The escaped string */ public static String escape(String source) { StringBuffer result; int i; char ch; String charRef; result = new StringBuffer(source.length()); for (i = 0; i < source.length(); ++i) { ch = source.charAt(i); // If the character is not printable, print as character // reference. Non printables are below ASCII space but not tab or // line terminator, ASCII delete, or above a certain Unicode // threshold. if ((ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r') || ch > LAST_PRINTABLE || ch == 0xF7) { result.append("&#").append(Integer.toString(ch)).append(';'); } else { // If there is a suitable entity reference for this // character, print it. The list of available entity // references is almost but not identical between XML and // HTML. charRef = getEntityRef(ch); if (charRef == null) { result.append(ch); } else { result.append('&').append(charRef).append(';'); } } } return result.toString(); } /** Find all comment nodes under the given node and add them to the given list. * * @param list List to add to. * @param node Node to search. */ private static void findCommentNodes(List list, Node node) { if (node.getNodeType() == Node.COMMENT_NODE) { list.add(node); } else { NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); ++i) { findCommentNodes(list, children.item(i)); } } } /** Get the entity reference for the given character. * * Taken from Xerces 1.0.3; see source code for license. */ private static String getEntityRef(char ch) { // Encode special XML characters into the equivalent character references. // These five are defined by default for all XML documents. switch (ch) { case '<': return "lt"; case '>': return "gt"; case '"': return "quot"; case '\'': return "apos"; case '&': return "amp"; } return null; } /** Get the text from the child node using the given buffer. * * @param node The node. * @param buffer The buffer to use. * @return The text. */ private static String text1(Node node, StringBuffer buffer) { for (Node ch = node.getFirstChild(); ch != null; ch = ch.getNextSibling()) { if (ch.getNodeType() == Node.ELEMENT_NODE || ch.getNodeType() == Node.ENTITY_REFERENCE_NODE) { buffer.append(text(ch)); } else if (ch.getNodeType() == Node.TEXT_NODE) { buffer.append(ch.getNodeValue()); } } return buffer.toString(); } /** Dump the structure of the DOM tree rooted at the given node to the given writer, * indenting the contents. * * @param indentAmt The number of spaces to indent the output of this node; * children are indented two more than this amount. * @param writer The writer to which write the tree structure. * @param node The tree to output. */ private static void dump(PrintWriter writer, Node node, int indentAmt) { for (int i = 0; i < indentAmt; ++i) { writer.print(' '); } writer.println(typeOf(node) + "(" + node.getNodeName() + ", " + node.getNodeValue() + ")"); NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); ++i) { dump(writer, children.item(i), indentAmt + 2); } } /** Return a human-readable representation of the type of the given node. * * For example, an attribute node returns Attribute, while an element * node returns Element. * * @param node The node. * @return The name of the node's type. */ private static String typeOf(Node node) { switch (node.getNodeType()) { case Node.ATTRIBUTE_NODE: return "Attribute"; case Node.CDATA_SECTION_NODE: return "CDATA-Section"; case Node.COMMENT_NODE: return "Comment"; case Node.DOCUMENT_FRAGMENT_NODE: return "Document-Fragment"; case Node.DOCUMENT_NODE: return "Document"; case Node.DOCUMENT_TYPE_NODE: return "Document-Type"; case Node.ELEMENT_NODE: return "Element"; case Node.ENTITY_NODE: return "Entity"; case Node.ENTITY_REFERENCE_NODE: return "Entity-Ref"; case Node.NOTATION_NODE: return "Notation"; case Node.PROCESSING_INSTRUCTION_NODE: return "Proc-Instr"; case Node.TEXT_NODE: return "Text"; default: return "Unknown!"; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy