All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.xml.CmsXmlUtils Maven / Gradle / Ivy

Go to download

OpenCms is an enterprise-ready, easy to use website content management system based on Java and XML technology. Offering a complete set of features, OpenCms helps content managers worldwide to create and maintain beautiful websites fast and efficiently.

There is a newer version: 17.0
Show newest version
/*
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software GmbH & Co. KG, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.xml;

import org.opencms.file.CmsResource;
import org.opencms.main.CmsLog;
import org.opencms.util.CmsStringUtil;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;

import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.logging.Log;
import org.apache.xerces.parsers.SAXParser;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * Provides some basic XML handling utilities.

* * @since 6.0.0 */ public final class CmsXmlUtils { /** * This class is only used to expose the XML parser configuration implementation name.

*/ private static class ParserImpl extends SAXParser { /** * Constructor.

*/ ParserImpl() { super(); } /** * Returns the implementation name of the used XML parser configuration.

* * @return the implementation name */ String getConfigImplName() { if (fConfiguration != null) { return fConfiguration.getClass().getName(); } else { return null; } } } /** The log object for this class. */ private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class); /** Key of the SAX parser configuration system property. */ private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration"; /** Key of the SAX parser factory system property. */ private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory"; /** Key of the XML reader system property. */ private static final String XML_READER_KEY = "org.xml.sax.driver"; /** * Prevents instances of this class from being generated.

*/ private CmsXmlUtils() { // noop } /** * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.

* * Use this method if it's uncertain if the given arguments are starting or ending with * a slash "/".

* * Examples:
* "title", "subtitle" becomes title/subtitle
* "title[1]/", "subtitle" becomes title[1]/subtitle
* "title[1]/", "/subtitle[1]" becomes title[1]/subtitle[1]

* * @param prefix the prefix Xpath * @param suffix the suffix Xpath * * @return the concatenated Xpath build from prefix and suffix */ public static String concatXpath(String prefix, String suffix) { if (suffix == null) { // ensure suffix is not null suffix = ""; } else { if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) { // remove leading '/' form suffix suffix = suffix.substring(1); } } if (prefix != null) { StringBuffer result = new StringBuffer(32); result.append(prefix); if (!CmsResource.isFolder(prefix)) { result.append('/'); } result.append(suffix); return result.toString(); } return suffix; } /** * Translates a simple lookup path to the simplified Xpath format used for * the internal bookmarks.

* * Examples:
* title becomes title[1]
* title[1] is left untouched
* title/subtitle becomes title[1]/subtitle[1]
* title/subtitle[1] becomes title[1]/subtitle[1]

* * Note: If the name already has the format title[1] then provided index parameter * is ignored.

* * @param path the path to get the simplified Xpath for * @param index the index to append (if required) * * @return the simplified Xpath for the given name */ public static String createXpath(String path, int index) { if (path.indexOf('/') > -1) { // this is a complex path over more then 1 node StringBuffer result = new StringBuffer(path.length() + 32); // split the path into sub elements List elements = CmsStringUtil.splitAsList(path, '/'); int end = elements.size() - 1; for (int i = 0; i <= end; i++) { // append [i] to path element if required result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1)); if (i < end) { // append path delimiter if not final path element result.append('/'); } } return result.toString(); } // this path has only 1 node, append [index] if required return createXpathElementCheck(path, index); } /** * Appends the provided index parameter in square brackets to the given name, * like path[index].

* * This method is used if it's clear that some path does not have * a square bracket already appended.

* * @param path the path append the index to * @param index the index to append * * @return the simplified Xpath for the given name */ public static String createXpathElement(String path, int index) { StringBuffer result = new StringBuffer(path.length() + 5); result.append(path); result.append('['); result.append(index); result.append(']'); return result.toString(); } /** * Ensures that a provided simplified Xpath has the format title[1].

* * This method is used if it's uncertain if some path does have * a square bracket already appended or not.

* * Note: If the name already has the format title[1], then provided index parameter * is ignored.

* * @param path the path to get the simplified Xpath for * @param index the index to append (if required) * * @return the simplified Xpath for the given name */ public static String createXpathElementCheck(String path, int index) { if (path.charAt(path.length() - 1) == ']') { // path is already in the form "title[1]" // ignore provided index and return the path "as is" return path; } // append index in square brackets return createXpathElement(path, index); } /** * Returns the first Xpath element from the provided path, * without the index value.

* * Examples:
* title is left untouched
* title[1] becomes title
* title/subtitle becomes title
* title[1]/subtitle[1] becomes title

* * @param path the path to get the first Xpath element from * * @return the first Xpath element from the provided path */ public static String getFirstXpathElement(String path) { int pos = path.indexOf('/'); if (pos >= 0) { path = path.substring(0, pos); } return CmsXmlUtils.removeXpathIndex(path); } /** * Returns the last Xpath element from the provided path, * without the index value.

* * Examples:
* title is left untouched
* title[1] becomes title
* title/subtitle becomes subtitle
* title[1]/subtitle[1] becomes subtitle

* * @param path the path to get the last Xpath element from * * @return the last Xpath element from the provided path */ public static String getLastXpathElement(String path) { int pos = path.lastIndexOf('/'); if (pos >= 0) { path = path.substring(pos + 1); } return CmsXmlUtils.removeXpathIndex(path); } /** * Returns the last Xpath index from the given path.

* * Examples:
* title returns the empty String

* title[1] returns [1]

* title/subtitle returns them empty String

* title[1]/subtitle[1] returns [1]

* * @param path the path to extract the Xpath index from * * @return the last Xpath index from the given path */ public static String getXpathIndex(String path) { int pos1 = path.lastIndexOf('/'); int pos2 = path.lastIndexOf('['); if ((pos2 < 0) || (pos1 > pos2)) { return ""; } return path.substring(pos2); } /** * Returns the last Xpath index from the given path as integer.

* * Examples:
* title returns 1

* title[1] returns 1

* title/subtitle returns 1

* title[1]/subtitle[2] returns 2

* * @param path the path to extract the Xpath index from * * @return the last Xpath index from the given path as integer */ public static int getXpathIndexInt(String path) { int pos1 = path.lastIndexOf('/'); int pos2 = path.lastIndexOf('['); if ((pos2 < 0) || (pos1 > pos2)) { return 1; } String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']')); try { return Integer.parseInt(idxStr); } catch (NumberFormatException e) { // NOOP } return 1; } /** * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.

* This is done for performance improvements only.

*/ public static void initSystemProperties() { String implName; // initialize system properties if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) { implName = SAXParserFactory.newInstance().getClass().getName(); LOG.info("Setting sax parser factory impl property to " + implName); System.setProperty(SAX_PARSER_FACTORY_KEY, implName); } if (System.getProperty(XML_READER_KEY) == null) { SAXReader reader = new SAXReader(); try { implName = reader.getXMLReader().getClass().getName(); LOG.info("Setting xml reader impl property to " + implName); System.setProperty(XML_READER_KEY, implName); } catch (SAXException e) { LOG.error("Error evaluating XMLReader impl.", e); } } if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) { ParserImpl saxParser = new ParserImpl(); implName = saxParser.getConfigImplName(); if (implName != null) { LOG.info("Setting xml parser configuration impl property to " + implName); System.setProperty(SAX_PARSER_CONFIG_KEY, implName); } } } /** * Returns true if the given path is a Xpath with * at least 2 elements.

* * Examples:
* title returns false
* title[1] returns false
* title/subtitle returns true
* title[1]/subtitle[1] returns true

* * @param path the path to check * @return true if the given path is a Xpath with at least 2 elements */ public static boolean isDeepXpath(String path) { return path.indexOf('/') > 0; } /** * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.

* * @param document the XML document to marshal * @param out the output stream to write to * @param encoding the encoding to use * @return the output stream with the xml content * @throws CmsXmlException if something goes wrong */ public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException { try { OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding(encoding); XMLWriter writer = new XMLWriter(out, format); writer.setEscapeText(false); writer.write(document); writer.close(); } catch (Exception e) { throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); } return out; } /** * Marshals (writes) an XML document to a String using XML pretty-print formatting.

* * @param document the XML document to marshal * @param encoding the encoding to use * @return the marshalled XML document * @throws CmsXmlException if something goes wrong */ public static String marshal(Document document, String encoding) throws CmsXmlException { ByteArrayOutputStream out = new ByteArrayOutputStream(); marshal(document, out, encoding); try { return out.toString(encoding); } catch (UnsupportedEncodingException e) { throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e); } } /** * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.

* * @param node the XML node to marshal * @param encoding the encoding to use * * @return the string with the xml content * * @throws CmsXmlException if something goes wrong */ public static String marshal(Node node, String encoding) throws CmsXmlException { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding(encoding); format.setSuppressDeclaration(true); XMLWriter writer = new XMLWriter(out, format); writer.setEscapeText(false); writer.write(node); writer.close(); } catch (Exception e) { throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); } return new String(out.toByteArray()); } /** * Removes all Xpath indices from the given path.

* * Example:
* title is left untouched
* title[1] becomes title
* title/subtitle is left untouched
* title[1]/subtitle[1] becomes title/subtitle

* * @param path the path to remove the Xpath index from * * @return the path with all Xpath indices removed */ public static String removeAllXpathIndices(String path) { return path.replaceAll("\\[[0-9]+\\]", ""); } /** * Removes the first Xpath element from the path.

* * If the provided path does not contain a "/" character, * it is returned unchanged.

* *

Examples:
* title is left untouched
* title[1] is left untouched
* title/subtitle becomes subtitle
* title[1]/subtitle[1] becomes subtitle[1]

* * @param path the Xpath to remove the first element from * * @return the path with the first element removed */ public static String removeFirstXpathElement(String path) { int pos = path.indexOf('/'); if (pos < 0) { return path; } return path.substring(pos + 1); } /** * Removes the last complex Xpath element from the path.

* * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths. * *

Example:
* system/backup[@date='23/10/2003']/resource[path='/a/b/c'] becomes system/backup[@date='23/10/2003']

* * @param path the Xpath to remove the last element from * * @return the path with the last element removed */ public static String removeLastComplexXpathElement(String path) { int pos = path.lastIndexOf('/'); if (pos < 0) { return path; } // count ' chars int p = pos; int count = -1; while (p > 0) { count++; p = path.indexOf("\'", p + 1); } String parentPath = path.substring(0, pos); if ((count % 2) == 0) { // if substring is complete return parentPath; } // if not complete p = parentPath.lastIndexOf("'"); if (p >= 0) { // complete it if possible return removeLastComplexXpathElement(parentPath.substring(0, p)); } return parentPath; } /** * Removes the last Xpath element from the path.

* * If the provided path does not contain a "/" character, * it is returned unchanged.

* *

Examples:
* title is left untouched
* title[1] is left untouched
* title/subtitle becomes title
* title[1]/subtitle[1] becomes title[1]

* * @param path the Xpath to remove the last element from * * @return the path with the last element removed */ public static String removeLastXpathElement(String path) { int pos = path.lastIndexOf('/'); if (pos < 0) { return path; } return path.substring(0, pos); } /** * Removes all Xpath index information from the given input path.

* * Examples:
* title is left untouched
* title[1] becomes title
* title/subtitle is left untouched
* title[1]/subtitle[1] becomes title/subtitle

* * @param path the path to remove the Xpath index information from * * @return the simplified Xpath for the given name */ public static String removeXpath(String path) { if (path.indexOf('/') > -1) { // this is a complex path over more then 1 node StringBuffer result = new StringBuffer(path.length() + 32); // split the path into sub-elements List elements = CmsStringUtil.splitAsList(path, '/'); int end = elements.size() - 1; for (int i = 0; i <= end; i++) { // remove [i] from path element if required result.append(removeXpathIndex(elements.get(i))); if (i < end) { // append path delimiter if not final path element result.append('/'); } } return result.toString(); } // this path has only 1 node, remove last index if required return removeXpathIndex(path); } /** * Removes the last Xpath index from the given path.

* * Examples:
* title is left untouched
* title[1] becomes title
* title/subtitle is left untouched
* title[1]/subtitle[1] becomes title[1]/subtitle

* * @param path the path to remove the Xpath index from * * @return the path with the last Xpath index removed */ public static String removeXpathIndex(String path) { int pos1 = path.lastIndexOf('/'); int pos2 = path.lastIndexOf('['); if ((pos2 < 0) || (pos1 > pos2)) { return path; } return path.substring(0, pos2); } /** * Simplifies an Xpath by removing a leading and a trailing slash from the given path.

* * Examples:
* title/ becomes title
* /title[1]/ becomes title[1]
* /title/subtitle/ becomes title/subtitle
* /title/subtitle[1]/ becomes title/subtitle[1]

* * @param path the path to process * @return the input with a leading and a trailing slash removed */ public static String simplifyXpath(String path) { StringBuffer result = new StringBuffer(path); if (result.charAt(0) == '/') { result.deleteCharAt(0); } int pos = result.length() - 1; if (result.charAt(pos) == '/') { result.deleteCharAt(pos); } return result.toString(); } /** * Helper to unmarshal (read) xml contents from a byte array into a document.

* * Using this method ensures that the OpenCms XML entity resolver is used.

* * @param xmlData the XML data in a byte array * @param resolver the XML entity resolver to use * * @return the base object initialized with the unmarshalled XML document * * @throws CmsXmlException if something goes wrong * * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) */ public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver); } /** * Helper to unmarshal (read) xml contents from a byte array into a document.

* * Using this method ensures that the OpenCms XML entity resolver is used.

* * @param xmlData the XML data in a byte array * @param resolver the XML entity resolver to use * @param validate if the reader should try to validate the xml code * * @return the base object initialized with the unmarshalled XML document * * @throws CmsXmlException if something goes wrong * * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) */ public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate) throws CmsXmlException { return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate); } /** * Helper to unmarshal (read) xml contents from an input source into a document.

* * Using this method ensures that the OpenCms XML entity resolver is used.

* * Important: The encoding provided will NOT be used during unmarshalling, * the XML parser will do this on the base of the information in the source String. * The encoding is used for initializing the created instance of the document, * which means it will be used when marshalling the document again later.

* * @param source the XML input source to use * @param resolver the XML entity resolver to use * * @return the unmarshalled XML document * * @throws CmsXmlException if something goes wrong */ public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException { return unmarshalHelper(source, resolver, false); } /** * Helper to unmarshal (read) xml contents from an input source into a document.

* * Using this method ensures that the OpenCms XML entity resolver is used.

* * Important: The encoding provided will NOT be used during unmarshalling, * the XML parser will do this on the base of the information in the source String. * The encoding is used for initializing the created instance of the document, * which means it will be used when marshalling the document again later.

* * @param source the XML input source to use * @param resolver the XML entity resolver to use * @param validate if the reader should try to validate the xml code * * @return the unmarshalled XML document * * @throws CmsXmlException if something goes wrong */ public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate) throws CmsXmlException { if (null == source) { throw new CmsXmlException( Messages.get().container( Messages.ERR_UNMARSHALLING_XML_DOC_1,"source==null!")); } try { SAXReader reader = new SAXReader(); if (resolver != null) { reader.setEntityResolver(resolver); } reader.setMergeAdjacentText(true); reader.setStripWhitespaceText(true); if (!validate) { reader.setValidation(false); reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } return reader.read(source); } catch (DocumentException e) { throw new CmsXmlException( Messages.get().container( Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + source.getSystemId() + ")"), e); } catch (SAXException e) { throw new CmsXmlException( Messages.get().container( Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + source.getSystemId() + ")"), e); } } /** * Helper to unmarshal (read) xml contents from a String into a document.

* * Using this method ensures that the OpenCms XML entitiy resolver is used.

* * @param xmlData the xml data in a String * @param resolver the XML entity resolver to use * @return the base object initialized with the unmarshalled XML document * @throws CmsXmlException if something goes wrong * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) */ public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException { return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver); } /** * Validates the structure of a XML document contained in a byte array * with the DTD or XML schema used by the document.

* * @param xmlData a byte array containing a XML document that should be validated * @param resolver the XML entity resolver to use * * @throws CmsXmlException if the validation fails */ public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { validateXmlStructure(new ByteArrayInputStream(xmlData), resolver); } /** * Validates the structure of a XML document with the DTD or XML schema used * by the document.

* * @param document a XML document that should be validated * @param encoding the encoding to use when marshalling the XML document (required) * @param resolver the XML entity resolver to use * * @throws CmsXmlException if the validation fails */ public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver) throws CmsXmlException { // generate bytes from document byte[] xmlData = ((ByteArrayOutputStream)marshal( document, new ByteArrayOutputStream(512), encoding)).toByteArray(); validateXmlStructure(xmlData, resolver); } /** * Validates the structure of a XML document contained in a byte array * with the DTD or XML schema used by the document.

* * @param xmlStream a source providing a XML document that should be validated * @param resolver the XML entity resolver to use * * @throws CmsXmlException if the validation fails */ public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException { XMLReader reader; try { reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); } catch (SAXException e) { // xerces parser not available - no schema validation possible if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e); } // no validation of the content is possible return; } // turn on validation try { reader.setFeature("http://xml.org/sax/features/validation", true); // turn on schema validation reader.setFeature("http://apache.org/xml/features/validation/schema", true); // configure namespace support reader.setFeature("http://xml.org/sax/features/namespaces", true); reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); } catch (SAXNotRecognizedException e) { // should not happen as Xerces 2 support this feature if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e); } // no validation of the content is possible return; } catch (SAXNotSupportedException e) { // should not happen as Xerces 2 support this feature if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e); } // no validation of the content is possible return; } // add an error handler which turns any errors into XML CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler(); reader.setErrorHandler(errorHandler); if (resolver != null) { // set the resolver for the "opencms://" URIs reader.setEntityResolver(resolver); } try { reader.parse(new InputSource(xmlStream)); } catch (IOException e) { // should not happen since we read form a byte array if (LOG.isErrorEnabled()) { LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e); } return; } catch (SAXException e) { // should not happen since all errors are handled in the XML error handler if (LOG.isErrorEnabled()) { LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e); } return; } if (errorHandler.getErrors().elements().size() > 0) { // there was at last one validation error, so throw an exception StringWriter out = new StringWriter(256); OutputFormat format = OutputFormat.createPrettyPrint(); XMLWriter writer = new XMLWriter(out, format); try { writer.write(errorHandler.getErrors()); writer.write(errorHandler.getWarnings()); writer.close(); } catch (IOException e) { // should not happen since we write to a StringWriter if (LOG.isErrorEnabled()) { LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e); } } // generate String from XML for display of document in error message throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString())); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy