com.sun.org.apache.xml.internal.serialize.XML11Serializer Maven / Gradle / Ivy
Show all versions of jaxp-ri Show documentation
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright (c) 1997-2010 Oracle and/or its affiliates. All rights reserved.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common Development
* and Distribution License("CDDL") (collectively, the "License"). You
* may not use this file except in compliance with the License. You can
* obtain a copy of the License at
* https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
* or packager/legal/LICENSE.txt. See the License for the specific
* language governing permissions and limitations under the License.
*
* When distributing the software, include this License Header Notice in each
* file and include the License file at packager/legal/LICENSE.txt.
*
* GPL Classpath Exception:
* Oracle designates this particular file as subject to the "Classpath"
* exception as provided by Oracle in the GPL Version 2 section of the License
* file that accompanied this code.
*
* Modifications:
* If applicable, add the following below the License Header, with the fields
* enclosed by brackets [] replaced by your own identifying information:
* "Portions Copyright [year] [name of copyright owner]"
*
* Contributor(s):
* If you wish your version of this file to be governed by only the CDDL or
* only the GPL Version 2, indicate your decision by adding "[Contributor]
* elects to include this software in this distribution under the [CDDL or GPL
* Version 2] license." If you don't indicate a single choice of license, a
* recipient has the option to distribute your version of this file under
* either the CDDL, the GPL Version 2 or to extend the choice of license to
* its licensees as provided above. However, if you add GPL Version 2 code
* and therefore, elected the GPL Version 2 license, then the option applies
* only if the new code is made subject to such option by the copyright
* holder.
*
*
* This file incorporates work covered by the following copyright and
* permission notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Sep 14, 2000:
// Fixed problem with namespace handling. Contributed by
// David Blondeau
// Sep 14, 2000:
// Fixed serializer to report IO exception directly, instead at
// the end of document processing.
// Reported by Patrick Higgins
// Aug 21, 2000:
// Fixed bug in startDocument not calling prepare.
// Reported by Mikael Staldal
// Aug 21, 2000:
// Added ability to omit DOCTYPE declaration.
package com.sun.org.apache.xml.internal.serialize;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
import com.sun.org.apache.xerces.internal.impl.Constants;
import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
import com.sun.org.apache.xerces.internal.util.SymbolTable;
import com.sun.org.apache.xerces.internal.util.XML11Char;
import com.sun.org.apache.xerces.internal.util.XMLChar;
import org.xml.sax.SAXException;
import org.w3c.dom.DOMError;
/**
* Implements an XML serializer supporting both DOM and SAX pretty
* serializing. For usage instructions see {@link Serializer}.
*
* If an output stream is used, the encoding is taken from the
* output format (defaults to UTF-8). If a writer is
* used, make sure the writer uses the same encoding (if applies)
* as specified in the output format.
*
* The serializer supports both DOM and SAX. SAX serializing is done by firing
* SAX events and using the serializer as a document handler. DOM serializing is done
* by calling {@link #serialize(Document)} or by using DOM Level 3
* {@link org.w3c.dom.ls.DOMSerializer} and
* serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
* {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
*
* If an I/O exception occurs while serializing, the serializer
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's {@link
* org.xml.sax.DocumentHandler#endDocument}.
*
* For elements that are not specified as whitespace preserving,
* the serializer will potentially break long text lines at space
* boundaries, indent lines, and serialize elements on separate
* lines. Line terminators will be regarded as spaces, and
* spaces at beginning of line will be stripped.
* @author Assaf Arkin
* @author Rahul Srivastava
* @author Elena Litani IBM
* @version $Revision: 1.6 $ $Date: 2010-11-01 04:40:37 $
* @see Serializer
*/
public class XML11Serializer
extends XMLSerializer {
//
// constants
//
protected static final boolean DEBUG = false;
//
// data
//
//
// DOM Level 3 implementation: variables intialized in DOMSerializerImpl
//
/** stores namespaces in scope */
protected NamespaceSupport fNSBinder;
/** stores all namespace bindings on the current element */
protected NamespaceSupport fLocalNSBinder;
/** symbol table for serialization */
protected SymbolTable fSymbolTable;
// is node dom level 1 node?
protected boolean fDOML1 = false;
// counter for new prefix names
protected int fNamespaceCounter = 1;
protected final static String PREFIX = "NS";
/**
* Controls whether namespace fixup should be performed during
* the serialization.
* NOTE: if this field is set to true the following
* fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
* XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
*/
protected boolean fNamespaces = false;
private boolean fPreserveSpace;
/**
* Constructs a new serializer. The serializer cannot be used without
* calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
* first.
*/
public XML11Serializer() {
super( );
_format.setVersion("1.1");
}
/**
* Constructs a new serializer. The serializer cannot be used without
* calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
* first.
*/
public XML11Serializer( OutputFormat format ) {
super( format );
_format.setVersion("1.1");
}
/**
* Constructs a new serializer that writes to the specified writer
* using the specified output format. If format is null,
* will use a default output format.
*
* @param writer The writer to use
* @param format The output format to use, null for the default
*/
public XML11Serializer( Writer writer, OutputFormat format ) {
super( writer, format );
_format.setVersion("1.1");
}
/**
* Constructs a new serializer that writes to the specified output
* stream using the specified output format. If format
* is null, will use a default output format.
*
* @param output The output stream to use
* @param format The output format to use, null for the default
*/
public XML11Serializer( OutputStream output, OutputFormat format ) {
super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
_format.setVersion("1.1");
}
//-----------------------------------------//
// SAX content handler serializing methods //
//-----------------------------------------//
public void characters( char[] chars, int start, int length )
throws SAXException
{
ElementState state;
try {
state = content();
// Check if text should be print as CDATA section or unescaped
// based on elements listed in the output format (the element
// state) or whether we are inside a CDATA section or entity.
if ( state.inCData || state.doCData ) {
int saveIndent;
// Print a CDATA section. The text is not escaped, but ']]>'
// appearing in the code must be identified and dealt with.
// The contents of a text node is considered space preserving.
if ( ! state.inCData ) {
_printer.printText( "' ) {
_printer.printText("]]]]>");
index +=2;
continue;
}
if (!XML11Char.isXML11Valid(ch)) {
// check if it is surrogate
if (++index < end) {
surrogates(ch, chars[index]);
}
else {
fatalError("The character '"+(char)ch+"' is an invalid XML character");
}
continue;
} else {
if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
_printer.printText((char)ch);
} else {
// The character is not printable -- split CDATA section
_printer.printText("]]>");
_printer.printText(Integer.toHexString(ch));
_printer.printText(";= ' ' && _encodingInfo.isPrintable((char) ch))) {
_printer.printText((char) ch);
} else {
printHex(ch);
}
}
}
protected final void printCDATAText(String text) throws IOException {
int length = text.length();
char ch;
for (int index = 0; index < length; ++index) {
ch = text.charAt(index);
if (ch == ']'
&& index + 2 < length
&& text.charAt(index + 1) == ']'
&& text.charAt(index + 2) == '>') { // check for ']]>'
if (fDOMErrorHandler != null){
// REVISIT: this means that if DOM Error handler is not registered we don't report any
// fatal errors and might serialize not wellformed document
if ((features & DOMSerializerImpl.SPLITCDATA) == 0
&& (features & DOMSerializerImpl.WELLFORMED) == 0) {
// issue fatal error
String msg =
DOMMessageFormatter.formatMessage(
DOMMessageFormatter.SERIALIZER_DOMAIN,
"EndingCDATA",
null);
modifyDOMError(
msg,
DOMError.SEVERITY_FATAL_ERROR,
null, fCurrentNode);
boolean continueProcess =
fDOMErrorHandler.handleError(fDOMError);
if (!continueProcess) {
throw new IOException();
}
} else {
// issue warning
String msg =
DOMMessageFormatter.formatMessage(
DOMMessageFormatter.SERIALIZER_DOMAIN,
"SplittingCDATA",
null);
modifyDOMError(
msg,
DOMError.SEVERITY_WARNING,
null, fCurrentNode);
fDOMErrorHandler.handleError(fDOMError);
}
}
// split CDATA section
_printer.printText("]]]]>");
index += 2;
continue;
}
if (!XML11Char.isXML11Valid(ch)) {
// check if it is surrogate
if (++index < length) {
surrogates(ch, text.charAt(index));
} else {
fatalError(
"The character '"
+ (char) ch
+ "' is an invalid XML character");
}
continue;
} else {
if (_encodingInfo.isPrintable((char) ch)
&& XML11Char.isXML11ValidLiteral(ch)) {
_printer.printText((char) ch);
} else {
// The character is not printable -- split CDATA section
_printer.printText("]]>");
_printer.printText(Integer.toHexString(ch));
_printer.printText(";'){
// character sequence "]]>" can't appear in content, therefore
// we should escape '>'
_printer.printText(">");
} else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
_printer.printText((char)ch);
} else {
printHex(ch);
}
}
protected final void surrogates(int high, int low) throws IOException{
if (XMLChar.isHighSurrogate(high)) {
if (!XMLChar.isLowSurrogate(low)) {
//Invalid XML
fatalError("The character '"+(char)low+"' is an invalid XML character");
}
else {
int supplemental = XMLChar.supplemental((char)high, (char)low);
if (!XML11Char.isXML11Valid(supplemental)) {
//Invalid XML
fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
}
else {
if (content().inCData ) {
_printer.printText("]]>");
_printer.printText(Integer.toHexString(supplemental));
_printer.printText("; 0 ) {
ch = chars[start++];
if (!XML11Char.isXML11Valid(ch)) {
// check if it is surrogate
if ( length-- > 0) {
surrogates(ch, chars[start++]);
} else {
fatalError("The character '"+(char)ch+"' is an invalid XML character");
}
continue;
}
if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
_printer.printText( ch );
else
printXMLChar( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
// use spaces between parts to break them into different
// lines. Spaces at beginning of line will be stripped
// by printing mechanism. Line terminator is treated
// no different than other text part.
while ( length-- > 0 ) {
ch = chars[start++];
if (!XML11Char.isXML11Valid(ch)) {
// check if it is surrogate
if ( length-- > 0) {
surrogates(ch, chars[start++]);
} else {
fatalError("The character '"+(char)ch+"' is an invalid XML character");
}
continue;
}
if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
_printer.printText( ch );
else
printXMLChar( ch );
}
}
}
public boolean reset() {
super.reset();
return true;
}
}