All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.vault.util.xml.serialize.BaseMarkupSerializer Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Sep 14, 2000:
//  Fixed comments to preserve whitespaces and add a line break
//  when indenting. Reported by Gervase Markham 
// Sep 14, 2000:
//  Fixed serializer to report IO exception directly, instead at
//  the end of document processing.
//  Reported by Patrick Higgins 
// Sep 13, 2000:
//   CR in character data will print as �D;
// Aug 25, 2000:
//   Fixed processing instruction printing inside element content
//   to not escape content. Reported by Mikael Staldal
//   
// Aug 25, 2000:
//   Added ability to omit comments.
//   Contributed by Anupam Bagchi 
// Aug 26, 2000:
//   Fixed bug in newline handling when preserving spaces.
//   Contributed by Mike Dusseault 
// Aug 29, 2000:
//   Fixed state.unescaped not being set to false when
//   entering element state.
//   Reported by Lowell Vaughn 


package org.apache.jackrabbit.vault.util.xml.serialize;


import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
import java.util.Map;
import java.util.TreeMap;
import java.util.Vector;

import org.apache.jackrabbit.vault.util.xml.xerces.dom.DOMErrorImpl;
import org.apache.jackrabbit.vault.util.xml.xerces.dom.DOMLocatorImpl;
import org.apache.jackrabbit.vault.util.xml.xerces.dom.DOMMessageFormatter;
import org.apache.jackrabbit.vault.util.xml.xerces.util.XMLChar;
import org.w3c.dom.DOMError;
import org.w3c.dom.DOMErrorHandler;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.Notation;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.ContentHandler;
import org.xml.sax.DTDHandler;
import org.xml.sax.DocumentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.DeclHandler;
import org.xml.sax.ext.LexicalHandler;

/**
 * Base class for a serializer supporting both DOM and SAX pretty
 * serializing of XML/HTML/XHTML documents. Derives classes perform
 * the method-specific serializing, this class provides the common
 * serializing mechanisms.
 * 

* The serializer must be initialized with the proper writer and * output format before it can be used by calling {@link #setOutputCharStream} * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat} * for the output format. *

* The serializer can be reused any number of times, but cannot * be used concurrently by two threads. *

* If an output stream is used, the encoding is taken from the * output format (defaults to UTF-8). If a writer is * used, make sure the writer uses the same encoding (if applies) * as specified in the output format. *

* The serializer supports both DOM and SAX. DOM serializing is done * by calling {@link #serialize(Document)} and SAX serializing is done by firing * SAX events and using the serializer as a document handler. * This also applies to derived class. *

* If an I/O exception occurs while serializing, the serializer * will not throw an exception directly, but only throw it * at the end of serializing (either DOM or SAX's {@link * org.xml.sax.DocumentHandler#endDocument}. *

* For elements that are not specified as whitespace preserving, * the serializer will potentially break long text lines at space * boundaries, indent lines, and serialize elements on separate * lines. Line terminators will be regarded as spaces, and * spaces at beginning of line will be stripped. *

* When indenting, the serializer is capable of detecting seemingly * element content, and serializing these elements indented on separate * lines. An element is serialized indented when it is the first or * last child of an element, or immediate following or preceding * another element. * * @author Assaf Arkin * @author Rahul Srivastava * @author Elena Litani, IBM * @see Serializer * @see LSSerializer */ public abstract class BaseMarkupSerializer implements ContentHandler, DocumentHandler, LexicalHandler, DTDHandler, DeclHandler, DOMSerializer, Serializer { // DOM L3 implementation protected short features = 0xFFFFFFFF; protected DOMErrorHandler fDOMErrorHandler; protected final DOMErrorImpl fDOMError = new DOMErrorImpl(); //protected LSSerializerFilter fDOMFilter; protected EncodingInfo _encodingInfo; /** * Holds array of all element states that have been entered. * The array is automatically resized. When leaving an element, * it's state is not removed but reused when later returning * to the same nesting level. */ private ElementState[] _elementStates; /** * The index of the next state to place in the array, * or one plus the index of the current state. When zero, * we are in no state. */ private int _elementStateCount; /** * Vector holding comments and PIs that come before the root * element (even after it), see {@link #serializePreRoot}. */ private Vector _preRoot; /** * If the document has been started (header serialized), this * flag is set to true so it's not started twice. */ protected boolean _started; /** * True if the serializer has been prepared. This flag is set * to false when the serializer is reset prior to using it, * and to true after it has been prepared for usage. */ private boolean _prepared; /** * Association between namespace URIs (keys) and prefixes (values). * Accumulated here prior to starting an element and placing this * list in the element state. */ protected Map _prefixes; /** * The system identifier of the document type, if known. */ protected String _docTypePublicId; /** * The system identifier of the document type, if known. */ protected String _docTypeSystemId; /** * The output format associated with this serializer. This will never * be a null reference. If no format was passed to the constructor, * the default one for this document type will be used. The format * object is never changed by the serializer. */ protected OutputFormat _format; /** * The printer used for printing text parts. */ protected Printer _printer; /** * True if indenting printer. */ protected boolean _indenting; /** * Temporary buffer to store character data */ protected final StringBuffer fStrBuffer = new StringBuffer(40); /** * The underlying writer. */ private Writer _writer; /** * The output stream. */ private OutputStream _output; /** * Current node that is being processed */ protected Node fCurrentNode = null; //--------------------------------// // Constructor and initialization // //--------------------------------// /** * Protected constructor can only be used by derived class. * Must initialize the serializer before serializing any document, * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream} * first */ protected BaseMarkupSerializer(OutputFormat format) { int i; _elementStates = new ElementState[10]; for (i = 0; i < _elementStates.length; ++i) _elementStates[i] = new ElementState(); _format = format; } public DocumentHandler asDocumentHandler() throws IOException { prepare(); return this; } public ContentHandler asContentHandler() throws IOException { prepare(); return this; } public DOMSerializer asDOMSerializer() throws IOException { prepare(); return this; } public void setOutputByteStream(OutputStream output) { if (output == null) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ArgumentIsNull", new Object[]{"output"}); throw new NullPointerException(msg); } _output = output; _writer = null; reset(); } public void setOutputCharStream(Writer writer) { if (writer == null) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ArgumentIsNull", new Object[]{"writer"}); throw new NullPointerException(msg); } _writer = writer; _output = null; reset(); } public void setOutputFormat(OutputFormat format) { if (format == null) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ArgumentIsNull", new Object[]{"format"}); throw new NullPointerException(msg); } _format = format; reset(); } public boolean reset() { if (_elementStateCount > 1) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "ResetInMiddle", null); throw new IllegalStateException(msg); } _prepared = false; fCurrentNode = null; fStrBuffer.setLength(0); return true; } protected void prepare() throws IOException { if (_prepared) return; if (_writer == null && _output == null) { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "NoWriterSupplied", null); throw new IOException(msg); } // If the output stream has been set, use it to construct // the writer. It is possible that the serializer has been // reused with the same output stream and different encoding. _encodingInfo = _format.getEncodingInfo(); if (_output != null) { _writer = _encodingInfo.getWriter(_output); } if (_format.getIndenting()) { _indenting = true; _printer = new IndentPrinter(_writer, _format); } else { _indenting = false; _printer = new Printer(_writer, _format); } ElementState state; _elementStateCount = 0; state = _elementStates[0]; state.namespaceURI = null; state.localName = null; state.rawName = null; state.preserveSpace = _format.getPreserveSpace(); state.empty = true; state.afterElement = false; state.afterComment = false; state.doCData = state.inCData = false; state.prefixes = null; _docTypePublicId = _format.getDoctypePublic(); _docTypeSystemId = _format.getDoctypeSystem(); _started = false; _prepared = true; } //----------------------------------// // DOM document serializing methods // //----------------------------------// /** * Serializes the DOM element using the previously specified * writer and output format. Throws an exception only if * an I/O exception occured while serializing. * * @param elem The element to serialize * @throws IOException An I/O exception occured while * serializing */ public void serialize(Element elem) throws IOException { reset(); prepare(); serializeNode(elem); _printer.flush(); if (_printer.getException() != null) throw _printer.getException(); } /** * Serializes the DOM document fragmnt using the previously specified * writer and output format. Throws an exception only if * an I/O exception occured while serializing. * * @param frag The element to serialize * @throws IOException An I/O exception occured while * serializing */ public void serialize(DocumentFragment frag) throws IOException { reset(); prepare(); serializeNode(frag); _printer.flush(); if (_printer.getException() != null) throw _printer.getException(); } /** * Serializes the DOM document using the previously specified * writer and output format. Throws an exception only if * an I/O exception occured while serializing. * * @param doc The document to serialize * @throws IOException An I/O exception occured while * serializing */ public void serialize(Document doc) throws IOException { reset(); prepare(); serializeNode(doc); serializePreRoot(); _printer.flush(); if (_printer.getException() != null) throw _printer.getException(); } //------------------------------------------// // SAX document handler serializing methods // //------------------------------------------// public void startDocument() throws SAXException { try { prepare(); } catch (IOException except) { throw new SAXException(except.toString()); } // Nothing to do here. All the magic happens in startDocument(String) } public void characters(char[] chars, int start, int length) throws SAXException { ElementState state; try { state = content(); // Check if text should be print as CDATA section or unescaped // based on elements listed in the output format (the element // state) or whether we are inside a CDATA section or entity. if (state.inCData || state.doCData) { int saveIndent; // Print a CDATA section. The text is not escaped, but ']]>' // appearing in the code must be identified and dealt with. // The contents of a text node is considered space preserving. if (!state.inCData) { _printer.printText("') { _printer.printText("]]]]>"); index += 2; continue; } if (!XMLChar.isValid(ch)) { // check if it is surrogate if (++index < length) { surrogates(ch, chars[index]); } else { fatalError("The character '" + (char) ch + "' is an invalid XML character"); } continue; } else { if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch) && ch != 0xF7) || ch == '\n' || ch == '\r' || ch == '\t') { _printer.printText((char) ch); } else { // The character is not printable -- split CDATA section _printer.printText("]]>&#x"); _printer.printText(Integer.toHexString(ch)); _printer.printText("; 0; ++i) _printer.printText(chars[i]); } } catch (IOException except) { throw new SAXException(except); } } public final void processingInstruction(String target, String code) throws SAXException { try { processingInstructionIO(target, code); } catch (IOException except) { throw new SAXException(except); } } public void processingInstructionIO(String target, String code) throws IOException { int index; ElementState state; state = content(); // Create the processing instruction textual representation. // Make sure we don't have '?>' inside either target or code. index = target.indexOf("?>"); if (index >= 0) fStrBuffer.append(""); if (index >= 0) fStrBuffer.append(code.substring(0, index)); else fStrBuffer.append(code); } fStrBuffer.append("?>"); // If before the root element (or after it), do not print // the PI directly but place it in the pre-root vector. if (isDocumentState()) { if (_preRoot == null) _preRoot = new Vector(); _preRoot.addElement(fStrBuffer.toString()); } else { _printer.indent(); printText(fStrBuffer.toString(), true, true); _printer.unindent(); if (_indenting) state.afterElement = true; } fStrBuffer.setLength(0); } public void comment(char[] chars, int start, int length) throws SAXException { try { comment(new String(chars, start, length)); } catch (IOException except) { throw new SAXException(except); } } public void comment(String text) throws IOException { int index; ElementState state; if (_format.getOmitComments()) return; state = content(); // Create the processing comment textual representation. // Make sure we don't have '-->' inside the comment. index = text.indexOf("-->"); if (index >= 0) fStrBuffer.append(""); else fStrBuffer.append(""); // If before the root element (or after it), do not print // the comment directly but place it in the pre-root vector. if (isDocumentState()) { if (_preRoot == null) _preRoot = new Vector(); _preRoot.addElement(fStrBuffer.toString()); } else { // Indent this element on a new line if the first // content of the parent element or immediately // following an element. if (_indenting && !state.preserveSpace) _printer.breakLine(); _printer.indent(); printText(fStrBuffer.toString(), true, true); _printer.unindent(); if (_indenting) state.afterElement = true; } fStrBuffer.setLength(0); state.afterComment = true; state.afterElement = false; } public void startCDATA() { ElementState state; state = getElementState(); state.doCData = true; } public void endCDATA() { ElementState state; state = getElementState(); state.doCData = false; } public void startNonEscaping() { ElementState state; state = getElementState(); state.unescaped = true; } public void endNonEscaping() { ElementState state; state = getElementState(); state.unescaped = false; } public void startPreserving() { ElementState state; state = getElementState(); state.preserveSpace = true; } public void endPreserving() { ElementState state; state = getElementState(); state.preserveSpace = false; } /** * Called at the end of the document to wrap it up. * Will flush the output stream and throw an exception * if any I/O error occured while serializing. * * @throws SAXException An I/O exception occured during * serializing */ public void endDocument() throws SAXException { try { // Print all the elements accumulated outside of // the root element. serializePreRoot(); // Flush the output, this is necessary for fStrBuffered output. _printer.flush(); } catch (IOException except) { throw new SAXException(except); } } public void startEntity(String name) { // ??? } public void endEntity(String name) { // ??? } public void setDocumentLocator(Locator locator) { // Nothing to do } //-----------------------------------------// // SAX content handler serializing methods // //-----------------------------------------// public void skippedEntity(String name) throws SAXException { try { endCDATA(); content(); _printer.printText('&'); _printer.printText(name); _printer.printText(';'); } catch (IOException except) { throw new SAXException(except); } } public void startPrefixMapping(String prefix, String uri) throws SAXException { if (_prefixes == null) _prefixes = new TreeMap(); _prefixes.put(uri, prefix == null ? "" : prefix); } public void endPrefixMapping(String prefix) throws SAXException { } //------------------------------------------// // SAX DTD/Decl handler serializing methods // //------------------------------------------// public final void startDTD(String name, String publicId, String systemId) throws SAXException { try { _printer.enterDTD(); _docTypePublicId = publicId; _docTypeSystemId = systemId; } catch (IOException except) { throw new SAXException(except); } } public void endDTD() { // Nothing to do here, all the magic occurs in startDocument(String). } public void elementDecl(String name, String model) throws SAXException { try { _printer.enterDTD(); _printer.printText("'); if (_indenting) _printer.breakLine(); } catch (IOException except) { throw new SAXException(except); } } public void attributeDecl(String eName, String aName, String type, String valueDefault, String value) throws SAXException { try { _printer.enterDTD(); _printer.printText("'); if (_indenting) _printer.breakLine(); } catch (IOException except) { throw new SAXException(except); } } public void internalEntityDecl(String name, String value) throws SAXException { try { _printer.enterDTD(); _printer.printText(""); if (_indenting) _printer.breakLine(); } catch (IOException except) { throw new SAXException(except); } } public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { try { _printer.enterDTD(); unparsedEntityDecl(name, publicId, systemId, null); } catch (IOException except) { throw new SAXException(except); } } public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { try { _printer.enterDTD(); if (publicId == null) { _printer.printText("'); if (_indenting) _printer.breakLine(); } catch (IOException except) { throw new SAXException(except); } } public void notationDecl(String name, String publicId, String systemId) throws SAXException { try { _printer.enterDTD(); if (publicId != null) { _printer.printText("'); if (_indenting) _printer.breakLine(); } catch (IOException except) { throw new SAXException(except); } } //------------------------------------------// // Generic node serializing methods methods // //------------------------------------------// /** * Serialize the DOM node. This method is shared across XML, HTML and XHTML * serializers and the differences are masked out in a separate {@link * #serializeElement}. * * @param node The node to serialize * @throws IOException An I/O exception occured while * serializing * @see #serializeElement */ protected void serializeNode(Node node) throws IOException { fCurrentNode = node; // Based on the node type call the suitable SAX handler. // Only comments entities and documents which are not // handled by SAX are serialized directly. switch (node.getNodeType()) { case Node.TEXT_NODE: { String text; text = node.getNodeValue(); if (text != null) { /* if (fDOMFilter != null && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT) != 0) { short code = fDOMFilter.acceptNode(node); switch (code) { case NodeFilter.FILTER_REJECT: case NodeFilter.FILTER_SKIP: { break; } default: { characters(text); } } } else */ if (!_indenting || getElementState().preserveSpace || (text.replace('\n', ' ').trim().length() != 0)) characters(text); } break; } case Node.CDATA_SECTION_NODE: { String text = node.getNodeValue(); if ((features & (0x1 << 3) /*DOMSerializerImpl.CDATA*/) != 0) { if (text != null) { /* if (fDOMFilter != null && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_CDATA_SECTION) != 0) { short code = fDOMFilter.acceptNode(node); switch (code) { case NodeFilter.FILTER_REJECT: case NodeFilter.FILTER_SKIP: { // skip the CDATA node return; } default: { //fall through.. } } } */ startCDATA(); characters(text); endCDATA(); } } else { // transform into a text node characters(text); } break; } case Node.COMMENT_NODE: { String text; if (!_format.getOmitComments()) { text = node.getNodeValue(); if (text != null) { /* if (fDOMFilter != null && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT) != 0) { short code = fDOMFilter.acceptNode(node); switch (code) { case NodeFilter.FILTER_REJECT: case NodeFilter.FILTER_SKIP: { // skip the comment node return; } default: { // fall through } } } */ comment(text); } } break; } case Node.ENTITY_REFERENCE_NODE: { Node child; endCDATA(); content(); if (((features & (0x1 << 2)/*DOMSerializerImpl.ENTITIES*/) != 0) || (node.getFirstChild() == null)) { /* if (fDOMFilter != null && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE) != 0) { short code = fDOMFilter.acceptNode(node); switch (code) { case NodeFilter.FILTER_REJECT: { return; // remove the node } case NodeFilter.FILTER_SKIP: { child = node.getFirstChild(); while (child != null) { serializeNode(child); child = child.getNextSibling(); } return; } default: { // fall through } } } */ checkUnboundNamespacePrefixedNode(node); _printer.printText("&"); _printer.printText(node.getNodeName()); _printer.printText(";"); } else { child = node.getFirstChild(); while (child != null) { serializeNode(child); child = child.getNextSibling(); } } break; } case Node.PROCESSING_INSTRUCTION_NODE: { /* if (fDOMFilter != null && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION) != 0) { short code = fDOMFilter.acceptNode(node); switch (code) { case NodeFilter.FILTER_REJECT: case NodeFilter.FILTER_SKIP: { return; // skip this node } default: { // fall through } } } */ processingInstructionIO(node.getNodeName(), node.getNodeValue()); break; } case Node.ELEMENT_NODE: { /* if (fDOMFilter != null && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT) != 0) { short code = fDOMFilter.acceptNode(node); switch (code) { case NodeFilter.FILTER_REJECT: { return; } case NodeFilter.FILTER_SKIP: { Node child = node.getFirstChild(); while (child != null) { serializeNode(child); child = child.getNextSibling(); } return; // skip this node } default: { // fall through } } } */ serializeElement((Element) node); break; } case Node.DOCUMENT_NODE: { DocumentType docType; DOMImplementation domImpl; NamedNodeMap map; Entity entity; Notation notation; int i; // If there is a document type, use the SAX events to // serialize it. docType = ((Document) node).getDoctype(); if (docType != null) { // DOM Level 2 (or higher) domImpl = ((Document) node).getImplementation(); try { String internal; _printer.enterDTD(); _docTypePublicId = docType.getPublicId(); _docTypeSystemId = docType.getSystemId(); internal = docType.getInternalSubset(); if (internal != null && internal.length() > 0) _printer.printText(internal); endDTD(); } // DOM Level 1 -- does implementation have methods? catch (NoSuchMethodError nsme) { Class docTypeClass = docType.getClass(); String docTypePublicId = null; String docTypeSystemId = null; try { java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId"); if (getPublicId.getReturnType().equals(String.class)) { docTypePublicId = (String) getPublicId.invoke(docType); } } catch (Exception e) { // ignore } try { java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId"); if (getSystemId.getReturnType().equals(String.class)) { docTypeSystemId = (String) getSystemId.invoke(docType); } } catch (Exception e) { // ignore } _printer.enterDTD(); _docTypePublicId = docTypePublicId; _docTypeSystemId = docTypeSystemId; endDTD(); } } // !! Fall through } case Node.DOCUMENT_FRAGMENT_NODE: { Node child; // By definition this will happen if the node is a document, // document fragment, etc. Just serialize its contents. It will // work well for other nodes that we do not know how to serialize. child = node.getFirstChild(); while (child != null) { serializeNode(child); child = child.getNextSibling(); } break; } default: break; } } /** * Must be called by a method about to print any type of content. * If the element was just opened, the opening tag is closed and * will be matched to a closing tag. Returns the current element * state with empty and afterElement set to false. * * @return The current element state * @throws IOException An I/O exception occured while * serializing */ protected ElementState content() throws IOException { ElementState state; state = getElementState(); if (!isDocumentState()) { // Need to close CData section first if (state.inCData && !state.doCData) { _printer.printText("]]>"); state.inCData = false; } // If this is the first content in the element, // change the state to not-empty and close the // opening element tag. if (state.empty) { _printer.printText('>'); state.empty = false; } // Except for one content type, all of them // are not last element. That one content // type will take care of itself. state.afterElement = false; // Except for one content type, all of them // are not last comment. That one content // type will take care of itself. state.afterComment = false; } return state; } /** * Called to print the text contents in the prevailing element format. * Since this method is capable of printing text as CDATA, it is used * for that purpose as well. White space handling is determined by the * current element state. In addition, the output format can dictate * whether the text is printed as CDATA or unescaped. * * @param text The text to print * @throws IOException An I/O exception occured while * serializing */ protected void characters(String text) throws IOException { ElementState state; state = content(); // Check if text should be print as CDATA section or unescaped // based on elements listed in the output format (the element // state) or whether we are inside a CDATA section or entity. if (state.inCData || state.doCData) { int saveIndent; // Print a CDATA section. The text is not escaped, but ']]>' // appearing in the code must be identified and dealt with. // The contents of a text node is considered space preserving. if (!state.inCData) { _printer.printText("'&' * will return "&amp;". * * @param ch Character value * @return Character entity name, or null */ protected abstract String getEntityRef(int ch); /** * Called to serializee the DOM element. The element is serialized based on * the serializer's method (XML, HTML, XHTML). * * @param elem The element to serialize * @throws IOException An I/O exception occured while * serializing */ protected abstract void serializeElement(Element elem) throws IOException; /** * Comments and PIs cannot be serialized before the root element, * because the root element serializes the document type, which * generally comes first. Instead such PIs and comments are * accumulated inside a vector and serialized by calling this * method. Will be called when the root element is serialized * and when the document finished serializing. * * @throws IOException An I/O exception occured while * serializing */ protected void serializePreRoot() throws IOException { int i; if (_preRoot != null) { for (i = 0; i < _preRoot.size(); ++i) { printText((String) _preRoot.elementAt(i), true, true); if (_indenting) _printer.breakLine(); } _preRoot.removeAllElements(); } } //---------------------------------------------// // Text pretty printing and formatting methods // //---------------------------------------------// protected void printCDATAText(String text) throws IOException { int length = text.length(); char ch; for (int index = 0; index < length; ++index) { ch = text.charAt(index); if (ch == ']' && index + 2 < length && text.charAt(index + 1) == ']' && text.charAt(index + 2) == '>') { // check for ']]>' if (fDOMErrorHandler != null) { // REVISIT: this means that if DOM Error handler is not registered we don't report any // fatal errors and might serialize not wellformed document /* if ((features & DOMSerializerImpl.SPLITCDATA) == 0 && (features & DOMSerializerImpl.WELLFORMED) == 0) { // issue fatal error String msg = DOMMessageFormatter.formatMessage( DOMMessageFormatter.SERIALIZER_DOMAIN, "EndingCDATA", null); modifyDOMError( msg, DOMError.SEVERITY_FATAL_ERROR, fCurrentNode); boolean continueProcess = fDOMErrorHandler.handleError(fDOMError); if (!continueProcess) { throw new IOException(); } } else*/ { // issue warning String msg = DOMMessageFormatter.formatMessage( DOMMessageFormatter.SERIALIZER_DOMAIN, "SplittingCDATA", null); modifyDOMError( msg, DOMError.SEVERITY_WARNING, fCurrentNode); fDOMErrorHandler.handleError(fDOMError); } } // split CDATA section _printer.printText("]]]]>"); index += 2; continue; } if (!XMLChar.isValid(ch)) { // check if it is surrogate if (++index < length) { surrogates(ch, text.charAt(index)); } else { fatalError("The character '" + (char) ch + "' is an invalid XML character"); } continue; } else { if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch) && ch != 0xF7) || ch == '\n' || ch == '\r' || ch == '\t') { _printer.printText((char) ch); } else { // The character is not printable -- split CDATA section _printer.printText("]]>&#x"); _printer.printText(Integer.toHexString(ch)); _printer.printText(";&#x"); _printer.printText(Integer.toHexString(supplemental)); _printer.printText("; 0) { ch = chars[start]; ++start; if (ch == '\n' || ch == '\r' || unescaped) _printer.printText(ch); else printEscaped(ch); } } else { // Not preserving spaces: print one part at a time, and // use spaces between parts to break them into different // lines. Spaces at beginning of line will be stripped // by printing mechanism. Line terminator is treated // no different than other text part. while (length-- > 0) { ch = chars[start]; ++start; if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r') _printer.printSpace(); else if (unescaped) _printer.printText(ch); else printEscaped(ch); } } } protected void printText(String text, boolean preserveSpace, boolean unescaped) throws IOException { int index; char ch; if (preserveSpace) { // Preserving spaces: the text must print exactly as it is, // without breaking when spaces appear in the text and without // consolidating spaces. If a line terminator is used, a line // break will occur. for (index = 0; index < text.length(); ++index) { ch = text.charAt(index); if (ch == '\n' || ch == '\r' || unescaped) _printer.printText(ch); else printEscaped(ch); } } else { // Not preserving spaces: print one part at a time, and // use spaces between parts to break them into different // lines. Spaces at beginning of line will be stripped // by printing mechanism. Line terminator is treated // no different than other text part. for (index = 0; index < text.length(); ++index) { ch = text.charAt(index); if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r') _printer.printSpace(); else if (unescaped) _printer.printText(ch); else printEscaped(ch); } } } /** * Print a document type public or system identifier URL. * Encapsulates the URL in double quotes, escapes non-printing * characters and print it equivalent to {@link #printText}. * * @param url The document type url to print */ protected void printDoctypeURL(String url) throws IOException { int i; _printer.printText('"'); for (i = 0; i < url.length(); ++i) { if (url.charAt(i) == '"' || url.charAt(i) < 0x20 || url.charAt(i) > 0x7F) { _printer.printText('%'); _printer.printText(Integer.toHexString(url.charAt(i))); } else _printer.printText(url.charAt(i)); } _printer.printText('"'); } protected void printEscaped(int ch) throws IOException { String charRef; // If there is a suitable entity reference for this // character, print it. The list of available entity // references is almost but not identical between // XML and HTML. charRef = getEntityRef(ch); if (charRef != null) { _printer.printText('&'); _printer.printText(charRef); _printer.printText(';'); } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch) && ch != 0xF7) || ch == '\n' || ch == '\r' || ch == '\t') { // Non printables are below ASCII space but not tab or line // terminator, ASCII delete, or above a certain Unicode threshold. if (ch < 0x10000) { _printer.printText((char) ch); } else { _printer.printText((char) (((ch - 0x10000) >> 10) + 0xd800)); _printer.printText((char) (((ch - 0x10000) & 0x3ff) + 0xdc00)); } } else { printHex(ch); } } /** * Escapes chars */ final void printHex(int ch) throws IOException { _printer.printText("&#x"); _printer.printText(Integer.toHexString(ch)); _printer.printText(';'); } /** * Escapes a string so it may be printed as text content or attribute * value. Non printable characters are escaped using character references. * Where the format specifies a deault entity reference, that reference * is used (e.g. &lt;). * * @param source The string to escape */ protected void printEscaped(String source) throws IOException { for (int i = 0; i < source.length(); ++i) { int ch = source.charAt(i); if ((ch & 0xfc00) == 0xd800 && i + 1 < source.length()) { int lowch = source.charAt(i + 1); if ((lowch & 0xfc00) == 0xdc00) { ch = 0x10000 + ((ch - 0xd800) << 10) + lowch - 0xdc00; i++; } } printEscaped(ch); } } //--------------------------------// // Element state handling methods // //--------------------------------// /** * Return the state of the current element. * * @return Current element state */ protected ElementState getElementState() { return _elementStates[_elementStateCount]; } /** * Enter a new element state for the specified element. * Tag name and space preserving is specified, element * state is initially empty. * * @return Current element state, or null */ protected ElementState enterElementState(String namespaceURI, String localName, String rawName, boolean preserveSpace) { ElementState state; if (_elementStateCount + 1 == _elementStates.length) { ElementState[] newStates; // Need to create a larger array of states. This does not happen // often, unless the document is really deep. newStates = new ElementState[_elementStates.length + 10]; for (int i = 0; i < _elementStates.length; ++i) newStates[i] = _elementStates[i]; for (int i = _elementStates.length; i < newStates.length; ++i) newStates[i] = new ElementState(); _elementStates = newStates; } ++_elementStateCount; state = _elementStates[_elementStateCount]; state.namespaceURI = namespaceURI; state.localName = localName; state.rawName = rawName; state.preserveSpace = preserveSpace; state.empty = true; state.afterElement = false; state.afterComment = false; state.doCData = state.inCData = false; state.unescaped = false; state.prefixes = _prefixes; _prefixes = null; return state; } /** * Leave the current element state and return to the * state of the parent element. If this was the root * element, return to the state of the document. * * @return Previous element state */ protected ElementState leaveElementState() { if (_elementStateCount > 0) { /*Corrected by David Blondeau ([email protected])*/ _prefixes = null; //_prefixes = _elementStates[ _elementStateCount ].prefixes; --_elementStateCount; return _elementStates[_elementStateCount]; } else { String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null); throw new IllegalStateException(msg); } } /** * Returns true if in the state of the document. * Returns true before entering any element and after * leaving the root element. * * @return True if in the state of the document */ protected boolean isDocumentState() { return _elementStateCount == 0; } /** * Returns the namespace prefix for the specified URI. * If the URI has been mapped to a prefix, returns the * prefix, otherwise returns null. * * @param namespaceURI The namespace URI * @return The namespace prefix if known, or null */ protected String getPrefix(String namespaceURI) { String prefix; if (_prefixes != null) { prefix = (String) _prefixes.get(namespaceURI); if (prefix != null) return prefix; } if (_elementStateCount == 0) return null; else { for (int i = _elementStateCount; i > 0; --i) { if (_elementStates[i].prefixes != null) { prefix = (String) _elementStates[i].prefixes.get(namespaceURI); if (prefix != null) return prefix; } } } return null; } /** * The method modifies global DOM error object * * @param message * @param severity * @return a DOMError */ protected DOMError modifyDOMError(String message, short severity, Node node) { fDOMError.reset(); fDOMError.fMessage = message; fDOMError.fSeverity = severity; fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null); return fDOMError; } protected void fatalError(String message) throws IOException { if (fDOMErrorHandler != null) { modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, fCurrentNode); fDOMErrorHandler.handleError(fDOMError); } else { throw new IOException(message); } } /** * DOM level 3: * Check a node to determine if it contains unbound namespace prefixes. * * @param node The node to check for unbound namespace prefices */ protected void checkUnboundNamespacePrefixedNode(Node node) throws IOException { } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy