org.opencms.util.CmsXmlSaxWriter Maven / Gradle / Ivy
Show all versions of opencms-core Show documentation
/*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH & Co. KG, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.util;
import org.opencms.i18n.CmsEncoder;
import org.opencms.main.OpenCms;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.DefaultHandler;
/**
* Simple SAX event handler that generates a XML (or HTML) file from the events caught.
*
* This can be used for writing large XML files where keeping a DOM structure
* in memory might cause out-of-memory issues, like e.g. when writing the
* OpenCms export files.
*
* It can also be used if a {@link org.xml.sax.ContentHandler}
is needed that should
* generate a XML / HTML file from a series of SAX events.
*
* @since 6.0.0
*/
public class CmsXmlSaxWriter extends DefaultHandler implements LexicalHandler {
/** The indentation to use. */
private static final String INDENT_STR = "\t";
/** The file encoding to use. */
private String m_encoding;
/**
* Indicates if characters that are not part of the selected encoding
* are to be replaced with the XML {
entity representation
* in the generated output (not in CDATA elements).
*/
private boolean m_escapeUnknownChars;
/** Indicates if XML entities are to be encoded in the generated output (not in CDATA elements). */
private boolean m_escapeXml;
/** The indentation level. */
private int m_indentLevel;
/** Indicates if a CDATA node is still open. */
private boolean m_isCdata;
/** The last element name written to the output. */
private String m_lastElementName;
/** Indicates if a CDATA node needs to be opened. */
private boolean m_openCdata;
/** Indicates if an element tag is still open. */
private boolean m_openElement;
/** The Writer to write the output to. */
private Writer m_writer;
/**
* Creates a SAX event handler that generates XML / HTML Strings from the events caught
* using a new {@link StringWriter}
and the OpenCms default encoding.
*/
public CmsXmlSaxWriter() {
this(new StringWriter(), OpenCms.getSystemInfo().getDefaultEncoding());
}
/**
* Creates a SAX event handler that generates XML / HTML Strings from the events caught
* using a new {@link StringWriter}
and the given encoding.
*
* @param encoding the encoding for the XML file
*/
public CmsXmlSaxWriter(String encoding) {
this(new StringWriter(), encoding);
}
/**
* Creates a SAX event handler that generates XML / HTML Strings from the events caught
* using a new {@link StringWriter}
and the given encoding.
*
* @param writer the Writer to write to output to
*/
public CmsXmlSaxWriter(Writer writer) {
this(writer, OpenCms.getSystemInfo().getDefaultEncoding());
}
/**
* A SAX event handler that generates XML / HTML Strings from the events caught and writes them
* to the given Writer.
*
* @param writer the Writer to write to output to
* @param encoding the encoding for the XML file
*/
public CmsXmlSaxWriter(Writer writer, String encoding) {
m_writer = writer;
m_encoding = encoding;
m_indentLevel = 0;
m_escapeXml = true;
m_escapeUnknownChars = false;
}
/**
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
@Override
public void characters(char[] buf, int offset, int len) throws SAXException {
if (len == 0) {
return;
}
if (m_openElement) {
write(">");
m_openElement = false;
}
if (m_openCdata) {
write("");
}
m_openCdata = false;
m_isCdata = false;
}
/**
* @see org.xml.sax.ContentHandler#endDocument()
*/
@Override
public void endDocument() throws SAXException {
try {
if (m_openElement) {
write("/>");
m_openElement = false;
}
writeNewLine();
m_writer.flush();
} catch (IOException e) {
throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e);
}
}
/**
* @see org.xml.sax.ext.LexicalHandler#endDTD()
*/
public void endDTD() {
// NOOP
}
/**
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public void endElement(String namespaceURI, String localName, String qualifiedName) throws SAXException {
String elementName = resolveName(localName, qualifiedName);
if (m_openElement) {
write("/>");
} else {
if (!elementName.equals(m_lastElementName)) {
writeNewLine();
}
write("");
write(elementName);
write(">");
}
m_openElement = false;
m_indentLevel--;
}
/**
* @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
*/
public void endEntity(String name) {
// NOOP
}
/**
* Returns the encoding this XML Sax writer was initialized with.
*
* @return the encoding this XML Sax writer was initialized with
*/
public String getEncoding() {
return m_encoding;
}
/**
* Returns the Writer where the XML is written to.
*
* @return the Writer where the XML is written to
*/
public Writer getWriter() {
return m_writer;
}
/**
* Returns true
if charactes that are not part of the selected encoding
* are to be replaced with the HTML {
entity representation
* in the generated output (not in CDATA elements).
*
* @return true
if charactes that are not part of the selected encoding
* are to be replaced with the HTML entity representation
*/
public boolean isEscapeUnknownChars() {
return m_escapeUnknownChars;
}
/**
* Returns true
if XML entities are to be encoded in the generated output (not in CDATA elements).
*
* @return true
if XML entities are to be encoded in the generated output (not in CDATA elements)
*/
public boolean isEscapeXml() {
return m_escapeXml;
}
/**
* Sets the encoding to use for the generated output.
*
* @param value the encoding to use for the generated output
*/
public void setEncoding(String value) {
m_encoding = value;
}
/**
* If set to true
, then charactes that are not part of the selected encoding
* are to be replaced with the XML {
entity representation
* in the generated output (not in CDATA elements).
*
* @param value indicates to escape unknown characters with XML entities or not
*/
public void setEscapeUnknownChars(boolean value) {
m_escapeUnknownChars = value;
}
/**
* If set to true
, then
* XML entities are to be encoded in the generated output (not in CDATA elements).
*
* @param value indicates to to escape characters with XML entities or not
*/
public void setEscapeXml(boolean value) {
m_escapeXml = value;
}
/**
* @see org.xml.sax.ext.LexicalHandler#startCDATA()
*/
public void startCDATA() {
m_openCdata = true;
m_isCdata = true;
}
/**
* @see org.xml.sax.ContentHandler#startDocument()
*/
@Override
public void startDocument() throws SAXException {
write("");
writeNewLine();
}
/**
* @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
*/
public void startDTD(String name, String publicId, String systemId) throws SAXException {
write("");
writeNewLine();
}
/**
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
@Override
public void startElement(String namespaceURI, String localName, String qualifiedName, Attributes attributes)
throws SAXException {
if (m_openElement) {
write(">");
m_openElement = false;
}
// increase indent and write linebreak
m_indentLevel++;
writeNewLine();
// get element name and write entry
m_lastElementName = resolveName(localName, qualifiedName);
write("<");
write(m_lastElementName);
if (attributes != null) {
for (int i = 0; i < attributes.getLength(); i++) {
write(" ");
write(resolveName(attributes.getLocalName(i), attributes.getQName(i)));
write("=\"");
String value = attributes.getValue(i);
if (m_escapeXml) {
// XML should be escaped
// escape HTML entities ('<' becomes '<')
value = CmsEncoder.escapeXml(value, true);
if (m_escapeUnknownChars) {
// escape all chars that can not be displayed in the selected encoding (using '{' entities)
value = CmsEncoder.adjustHtmlEncoding(value, getEncoding());
}
}
write(value);
write("\"");
}
}
m_openElement = true;
}
/**
* @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
*/
public void startEntity(String name) {
// ignore
}
/**
* Resolves the local vs. the qualified name.
*
* If the local name is the empty String "", the qualified name is used.
*
* @param localName the local name
* @param qualifiedName the qualified XML 1.0 name
* @return the resolved name to use
*/
private String resolveName(String localName, String qualifiedName) {
if ((localName == null) || (localName.length() == 0)) {
return qualifiedName;
} else {
return localName;
}
}
/**
* Writes s String to the output stream.
*
* @param s the String to write
* @throws SAXException in case of I/O errors
*/
private void write(String s) throws SAXException {
try {
m_writer.write(s);
} catch (IOException e) {
throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e);
}
}
/**
* Writes a linebreak to the output stream, also handles the indentation.
*
* @throws SAXException in case of I/O errors
*/
private void writeNewLine() throws SAXException {
try {
// write new line
m_writer.write("\r\n");
// write indentation
for (int i = 1; i < m_indentLevel; i++) {
m_writer.write(INDENT_STR);
}
// flush the stream
m_writer.flush();
} catch (IOException e) {
throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e);
}
}
}