All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.geronimo.system.configuration.OutputFormat Maven / Gradle / Ivy

The newest version!
/**
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

/*
 * This code has been borrowed from the Apache Xerces project. We're copying the code to
 * keep from adding a dependency on Xerces in the Geronimo kernel.
 */

package org.apache.geronimo.system.configuration;

import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLDocument;


/**
 * Specifies an output format to control the serializer. Based on the
 * XSLT specification for output format, plus additional parameters.
 * Used to select the suitable serializer and determine how the
 * document should be formatted on output.
 * 

* The two interesting constructors are: *

    *
  • {@link #OutputFormat(String,String,boolean)} creates a format * for the specified method (XML, HTML, Text, etc), encoding and indentation *
  • {@link #OutputFormat(Document,String,boolean)} creates a format * compatible with the document type (XML, HTML, Text, etc), encoding and * indentation *
* * * @version $Revision: 476049 $ $Date: 2006-11-16 23:35:17 -0500 (Thu, 16 Nov 2006) $ * @author Assaf Arkin * Keith Visco * @see Serializer * @see Method */ public class OutputFormat { public static class DTD { /** * Public identifier for HTML document type. */ public static final String HTMLPublicId = "-//W3C//DTD HTML 4.0//EN"; /** * System identifier for HTML document type. */ public static final String HTMLSystemId = "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd"; /** * Public identifier for XHTML document type. */ public static final String XHTMLPublicId = "-//W3C//DTD XHTML 1.0 Strict//EN"; /** * System identifier for XHTML document type. */ public static final String XHTMLSystemId = "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd"; } public static class Defaults { /** * If indentation is turned on, the default identation * level is 4. * * @see #setIndenting(boolean) */ public static final int Indent = 4; /** * The default encoding for Web documents it UTF-8. * * @see #getEncoding() */ public static final String Encoding = "UTF-8"; /** * The default line width at which to break long lines * when identing. This is set to 72. */ public static final int LineWidth = 72; } /** * Holds the output method specified for this document, * or null if no method was specified. */ private String method; /** * Specifies the version of the output method. */ private String version; /** * The indentation level, or zero if no indentation * was requested. */ private int indent = 0; /** * The encoding to use, if an input stream is used. * The default is always UTF-8. */ private String encoding = Defaults.Encoding; /** * The EncodingInfo instance for _encoding. */ private EncodingInfo encodingInfo = null; /** * The specified media type or null. */ private String mediaType; /** * The specified document type system identifier, or null. */ private String doctypeSystem; /** * The specified document type public identifier, or null. */ private String doctypePublic; /** * Ture if the XML declaration should be ommited; */ private boolean omitXmlDeclaration = false; /** * Ture if the DOCTYPE declaration should be ommited; */ private boolean omitDoctype = false; /** * Ture if comments should be ommited; */ private boolean omitComments = false; /** * True if the document type should be marked as standalone. */ private boolean standalone = false; /** * List of element tag names whose text node children must * be output as CDATA. */ private String[] cdataElements; /** * List of element tag names whose text node children must * be output unescaped. */ private String[] nonEscapingElements; /** * The selected line separator. */ private String lineSeparator = "\n"; /** * The line width at which to wrap long lines when indenting. */ private int _lineWidth = Defaults.LineWidth; /** * True if spaces should be preserved in elements that do not * specify otherwise, or specify the default behavior. */ private boolean preserve = false; /** If true, an empty string valued attribute is output as "". If false and * and we are using the HTMLSerializer, then only the attribute name is * serialized. Defaults to false for backwards compatibility. */ private boolean preserveEmptyAttributes = false; /** * Constructs a new output format with the default values. */ public OutputFormat() { } /** * Constructs a new output format with the default values for * the specified method and encoding. If indent * is true, the document will be pretty printed with the default * indentation level and default line wrapping. * * @param method The specified output method * @param encoding The specified encoding * @param indenting True for pretty printing * @see #setEncoding * @see #setIndenting * @see #setMethod */ public OutputFormat( String method, String encoding, boolean indenting ) { setMethod( method ); setEncoding( encoding ); setIndenting( indenting ); } /** * Constructs a new output format with the proper method, * document type identifiers and media type for the specified * document. * * @param doc The document to output * @see #whichMethod */ public OutputFormat( Document doc ) { setMethod( whichMethod( doc ) ); setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) ); setMediaType( whichMediaType( getMethod() ) ); } /** * Constructs a new output format with the proper method, * document type identifiers and media type for the specified * document, and with the specified encoding. If indent * is true, the document will be pretty printed with the default * indentation level and default line wrapping. * * @param doc The document to output * @param encoding The specified encoding * @param indenting True for pretty printing * @see #setEncoding * @see #setIndenting * @see #whichMethod */ public OutputFormat( Document doc, String encoding, boolean indenting ) { this( doc ); setEncoding( encoding ); setIndenting( indenting ); } /** * Returns the method specified for this output format. * Typically the method will be xml, html * or text, but it might be other values. * If no method was specified, null will be returned * and the most suitable method will be determined for * the document by calling {@link #whichMethod}. * * @return The specified output method, or null */ public String getMethod() { return method; } /** * Sets the method for this output format. * * @see #getMethod * @param method The output method, or null */ public void setMethod( String method ) { this.method = method; } /** * Returns the version for this output method. * If no version was specified, will return null * and the default version number will be used. * If the serializerr does not support that particular * version, it should default to a supported version. * * @return The specified method version, or null */ public String getVersion() { return version; } /** * Sets the version for this output method. * For XML the value would be "1.0", for HTML * it would be "4.0". * * @see #getVersion * @param version The output method version, or null */ public void setVersion( String version ) { this.version = version; } /** * Returns the indentation specified. If no indentation * was specified, zero is returned and the document * should not be indented. * * @return The indentation or zero * @see #setIndenting */ public int getIndent() { return indent; } /** * Returns true if indentation was specified. */ public boolean getIndenting() { return ( indent > 0 ); } /** * Sets the indentation. The document will not be * indented if the indentation is set to zero. * Calling {@link #setIndenting} will reset this * value to zero (off) or the default (on). * * @param indent The indentation, or zero */ public void setIndent( int indent ) { if ( indent < 0 ) this.indent = 0; else this.indent = indent; } /** * Sets the indentation on and off. When set on, the default * indentation level and default line wrapping is used * (see {@link #DEFAULT_INDENT} and {@link #DEFAULT_LINE_WIDTH}). * To specify a different indentation level or line wrapping, * use {@link #setIndent} and {@link #setLineWidth}. * * @param on True if indentation should be on */ public void setIndenting( boolean on ) { if ( on ) { indent = Defaults.Indent; _lineWidth = Defaults.LineWidth; } else { indent = 0; _lineWidth = 0; } } /** * Returns the specified encoding. If no encoding was * specified, the default is always "UTF-8". * * @return The encoding */ public String getEncoding() { return encoding; } /** * Sets the encoding for this output method. If no * encoding was specified, the default is always "UTF-8". * Make sure the encoding is compatible with the one * used by the {@link java.io.Writer}. * * @see #getEncoding * @param encoding The encoding, or null */ public void setEncoding( String encoding ) { this.encoding = encoding; encodingInfo = null; } /** * Sets the encoding for this output method with an EncodingInfo * instance. */ public void setEncoding(EncodingInfo encInfo) { encoding = encInfo.getName(); encodingInfo = encInfo; } /** * Returns an EncodingInfo instance for the encoding. * * @see setEncoding */ public EncodingInfo getEncodingInfo() { if (encodingInfo == null) encodingInfo = Encodings.getEncodingInfo(encoding); return encodingInfo; } /** * Returns the specified media type, or null. * To determine the media type based on the * document type, use {@link #whichMediaType}. * * @return The specified media type, or null */ public String getMediaType() { return mediaType; } /** * Sets the media type. * * @see #getMediaType * @param mediaType The specified media type */ public void setMediaType( String mediaType ) { this.mediaType = mediaType; } /** * Sets the document type public and system identifiers. * Required only if the DOM Document or SAX events do not * specify the document type, and one must be present in * the serialized document. Any document type specified * by the DOM Document or SAX events will override these * values. * * @param publicId The public identifier, or null * @param systemId The system identifier, or null */ public void setDoctype( String publicId, String systemId ) { doctypePublic = publicId; doctypeSystem = systemId; } /** * Returns the specified document type public identifier, * or null. */ public String getDoctypePublic() { return doctypePublic; } /** * Returns the specified document type system identifier, * or null. */ public String getDoctypeSystem() { return doctypeSystem; } /** * Returns true if comments should be ommited. * The default is false. */ public boolean getOmitComments() { return omitComments; } /** * Sets comment omitting on and off. * * @param omit True if comments should be ommited */ public void setOmitComments( boolean omit ) { omitComments = omit; } /** * Returns true if the DOCTYPE declaration should * be ommited. The default is false. */ public boolean getOmitDocumentType() { return omitDoctype; } /** * Sets DOCTYPE declaration omitting on and off. * * @param omit True if DOCTYPE declaration should be ommited */ public void setOmitDocumentType( boolean omit ) { omitDoctype = omit; } /** * Returns true if the XML document declaration should * be ommited. The default is false. */ public boolean getOmitXMLDeclaration() { return omitXmlDeclaration; } /** * Sets XML declaration omitting on and off. * * @param omit True if XML declaration should be ommited */ public void setOmitXMLDeclaration( boolean omit ) { omitXmlDeclaration = omit; } /** * Returns true if the document type is standalone. * The default is false. */ public boolean getStandalone() { return standalone; } /** * Sets document DTD standalone. The public and system * identifiers must be null for the document to be * serialized as standalone. * * @param standalone True if document DTD is standalone */ public void setStandalone( boolean standalone ) { this.standalone = standalone; } /** * Returns a list of all the elements whose text node children * should be output as CDATA, or null if no such elements were * specified. */ public String[] getCDataElements() { return cdataElements; } /** * Returns true if the text node children of the given elements * should be output as CDATA. * * @param tagName The element's tag name * @return True if should serialize as CDATA */ public boolean isCDataElement( String tagName ) { int i; if ( cdataElements == null ) return false; for ( i = 0 ; i < cdataElements.length ; ++i ) if ( cdataElements[ i ].equals( tagName ) ) return true; return false; } /** * Sets the list of elements for which text node children * should be output as CDATA. * * @param cdataElements List of CDATA element tag names */ public void setCDataElements( String[] cdataElements ) { this.cdataElements = cdataElements; } /** * Returns a list of all the elements whose text node children * should be output unescaped (no character references), or null * if no such elements were specified. */ public String[] getNonEscapingElements() { return nonEscapingElements; } /** * Returns true if the text node children of the given elements * should be output unescaped. * * @param tagName The element's tag name * @return True if should serialize unescaped */ public boolean isNonEscapingElement( String tagName ) { int i; if ( nonEscapingElements == null ) return false; for ( i = 0 ; i < nonEscapingElements.length ; ++i ) if ( nonEscapingElements[ i ].equals( tagName ) ) return true; return false; } /** * Sets the list of elements for which text node children * should be output unescaped (no character references). * * @param nonEscapingElements List of unescaped element tag names */ public void setNonEscapingElements( String[] nonEscapingElements ) { this.nonEscapingElements = nonEscapingElements; } /** * Returns a specific line separator to use. The default is the * Web line separator (\n). A string is returned to * support double codes (CR + LF). * * @return The specified line separator */ public String getLineSeparator() { return lineSeparator; } /** * Sets the line separator. The default is the Web line separator * (\n). The machine's line separator can be obtained * from the system property line.separator, but is only * useful if the document is edited on machines of the same type. * For general documents, use the Web line separator. * * @param lineSeparator The specified line separator */ public void setLineSeparator( String lineSeparator ) { if ( lineSeparator == null ) this.lineSeparator = "\n"; else this.lineSeparator = lineSeparator; } /** * Returns true if the default behavior for this format is to * preserve spaces. All elements that do not specify otherwise * or specify the default behavior will be formatted based on * this rule. All elements that specify space preserving will * always preserve space. */ public boolean getPreserveSpace() { return preserve; } /** * Sets space preserving as the default behavior. The default is * space stripping and all elements that do not specify otherwise * or use the default value will not preserve spaces. * * @param preserve True if spaces should be preserved */ public void setPreserveSpace( boolean preserve ) { this.preserve = preserve; } /** * Return the selected line width for breaking up long lines. * When indenting, and only when indenting, long lines will be * broken at space boundaries based on this line width. * No line wrapping occurs if this value is zero. */ public int getLineWidth() { return _lineWidth; } /** * Sets the line width. If zero then no line wrapping will * occur. Calling {@link #setIndenting} will reset this * value to zero (off) or the default (on). * * @param lineWidth The line width to use, zero for default * @see #getLineWidth * @see #setIndenting */ public void setLineWidth( int lineWidth ) { if ( lineWidth <= 0 ) _lineWidth = 0; else _lineWidth = lineWidth; } /** * Returns the preserveEmptyAttribute flag. If flag is false, then' * attributes with empty string values are output as the attribute * name only (in HTML mode). * @return preserve the preserve flag */ public boolean getPreserveEmptyAttributes () { return preserveEmptyAttributes; } /** * Sets the preserveEmptyAttribute flag. If flag is false, then' * attributes with empty string values are output as the attribute * name only (in HTML mode). * @param preserve the preserve flag */ public void setPreserveEmptyAttributes (boolean preserve) { preserveEmptyAttributes = preserve; } /** * Returns the last printable character based on the selected * encoding. Control characters and non-printable characters * are always printed as character references. */ public char getLastPrintable() { if ( getEncoding() != null && ( getEncoding().equalsIgnoreCase( "ASCII" ) ) ) return 0xFF; else return 0xFFFF; } /** * Determine the output method for the specified document. * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument} * then the method is said to be html. If the root * element is 'html' and all text nodes preceding the root * element are all whitespace, then the method is said to be * html. Otherwise the method is xml. * * @param doc The document to check * @return The suitable method */ public static String whichMethod( Document doc ) { Node node; String value; int i; // If document is derived from HTMLDocument then the default // method is html. if ( doc instanceof HTMLDocument ) return Method.HTML; // Lookup the root element and the text nodes preceding it. // If root element is html and all text nodes contain whitespace // only, the method is html. // FIXME (SM) should we care about namespaces here? node = doc.getFirstChild(); while (node != null) { // If the root element is html, the method is html. if ( node.getNodeType() == Node.ELEMENT_NODE ) { if ( node.getNodeName().equalsIgnoreCase( "html" ) ) { return Method.HTML; } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) { return Method.FOP; } else { return Method.XML; } } else if ( node.getNodeType() == Node.TEXT_NODE ) { // If a text node preceding the root element contains // only whitespace, this might be html, otherwise it's // definitely xml. value = node.getNodeValue(); for ( i = 0 ; i < value.length() ; ++i ) if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A && value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D ) return Method.XML; } node = node.getNextSibling(); } // Anything else, the method is xml. return Method.XML; } /** * Returns the document type public identifier * specified for this document, or null. */ public static String whichDoctypePublic( Document doc ) { DocumentType doctype; /* DOM Level 2 was introduced into the code base*/ doctype = doc.getDoctype(); if ( doctype != null ) { // Note on catch: DOM Level 1 does not specify this method // and the code will throw a NoSuchMethodError try { return doctype.getPublicId(); } catch ( Error except ) { } } if ( doc instanceof HTMLDocument ) return DTD.XHTMLPublicId; return null; } /** * Returns the document type system identifier * specified for this document, or null. */ public static String whichDoctypeSystem( Document doc ) { DocumentType doctype; /* DOM Level 2 was introduced into the code base*/ doctype = doc.getDoctype(); if ( doctype != null ) { // Note on catch: DOM Level 1 does not specify this method // and the code will throw a NoSuchMethodError try { return doctype.getSystemId(); } catch ( Error except ) { } } if ( doc instanceof HTMLDocument ) return DTD.XHTMLSystemId; return null; } /** * Returns the suitable media format for a document * output with the specified method. */ public static String whichMediaType( String method ) { if ( method.equalsIgnoreCase( Method.XML ) ) return "text/xml"; if ( method.equalsIgnoreCase( Method.HTML ) ) return "text/html"; if ( method.equalsIgnoreCase( Method.XHTML ) ) return "text/html"; if ( method.equalsIgnoreCase( Method.TEXT ) ) return "text/plain"; if ( method.equalsIgnoreCase( Method.FOP ) ) return "application/pdf"; return null; } }