org.jdom.output.Format Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jdom Show documentation
A complete, Java-based solution for accessing, manipulating, and outputting XML data
There is a newer version: 2.0.2
/*--

 $Id: Format.java,v 1.14 2009/07/23 05:54:23 jhunter Exp $

 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:

 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions, and the disclaimer that follows
    these conditions in the documentation and/or other materials
    provided with the distribution.

 3. The name "JDOM" must not be used to endorse or promote products
    derived from this software without prior written permission.  For
    written permission, please contact .

 4. Products derived from this software may not be called "JDOM", nor
    may "JDOM" appear in their name, without prior written permission
    from the JDOM Project Management .

 In addition, we request (but do not require) that you include in the
 end-user documentation provided with the redistribution and/or in the
 software itself an acknowledgement equivalent to the following:
     "This product includes software developed by the
      JDOM Project (http://www.jdom.org/)."
 Alternatively, the acknowledgment may be graphical using the logos
 available at http://www.jdom.org/images/logos.

 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

 This software consists of voluntary contributions made by many
 individuals on behalf of the JDOM Project and was originally
 created by Jason Hunter  and
 Brett McLaughlin .  For more information
 on the JDOM Project, please see .

 */

package org.jdom.output;

import java.lang.reflect.Method;
import org.jdom.Verifier;

/**
 * Class to encapsulate XMLOutputter format options.
 * Typical users can use the standard format configurations obtained by
 * {@link #getRawFormat} (no whitespace changes),
 * {@link #getPrettyFormat} (whitespace beautification), and
 * {@link #getCompactFormat} (whitespace normalization).
 * 
 * Several modes are available to effect the way textual content is printed.
 * See the documentation for {@link TextMode} for details.
 *
 * @version $Revision: 1.14 $, $Date: 2009/07/23 05:54:23 $
 * @author Jason Hunter
 */
public class Format implements Cloneable {

    private static final String CVS_ID =
            "@(#) $RCSfile: Format.java,v $ $Revision: 1.14 $ $Date: 2009/07/23 05:54:23 $ $Name:  $";

    /**
     * Returns a new Format object that performs no whitespace changes, uses
     * the UTF-8 encoding, doesn't expand empty elements, includes the
     * declaration and encoding, and uses the default entity escape strategy.
     * Tweaks can be made to the returned Format instance without affecting
     * other instances.

     * @return                     a Format with no whitespace changes
     */
    public static Format getRawFormat() {
        return new Format();
    }

    /**
     * Returns a new Format object that performs whitespace beautification with
     * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
     * includes the declaration and encoding, and uses the default entity
     * escape strategy.
     * Tweaks can be made to the returned Format instance without affecting
     * other instances.
     *
     * @return                     a Format with whitespace beautification
     */
    public static Format getPrettyFormat() {
        Format f = new Format();
        f.setIndent(STANDARD_INDENT);
        f.setTextMode(TextMode.TRIM);
        return f;
    }

    /**
     * Returns a new Format object that performs whitespace normalization, uses
     * the UTF-8 encoding, doesn't expand empty elements, includes the
     * declaration and encoding, and uses the default entity escape strategy.
     * Tweaks can be made to the returned Format instance without affecting
     * other instances.
     *
     * @return                     a Format with whitespace normalization
     */
    public static Format getCompactFormat() {
        Format f = new Format();
        f.setTextMode(TextMode.NORMALIZE);
        return f;
    }

    /** standard value to indent by, if we are indenting */
    private static final String STANDARD_INDENT = "  ";

    /** standard string with which to end a line */
    private static final String STANDARD_LINE_SEPARATOR = "\r\n";

    /** standard encoding */
    private static final String STANDARD_ENCODING = "UTF-8";


    /** The default indent is no spaces (as original document) */
    String indent = null;

    /** New line separator */
    String lineSeparator = STANDARD_LINE_SEPARATOR;

    /** The encoding format */
    String encoding = STANDARD_ENCODING;

    /** Whether or not to output the XML declaration
     * - default is false */
    boolean omitDeclaration = false;

    /** Whether or not to output the encoding in the XML declaration
     * - default is false */
    boolean omitEncoding = false;

    /** Whether or not to expand empty elements to
     * <tagName></tagName> - default is false */
    boolean expandEmptyElements = false;

    /** Whether TrAX output escaping disabling/enabling PIs are ignored
      * or processed - default is false */
    boolean ignoreTrAXEscapingPIs = false;

    /** text handling mode */
    TextMode mode = TextMode.PRESERVE;

    /** entity escape logic */
    EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);

    /**
     * Creates a new Format instance with default (raw) behavior.
     */
    private Format() { }

    /**
     * Sets the {@link EscapeStrategy} to use for character escaping.
     *
     * @param strategy the EscapeStrategy to use
     * @return a pointer to this Format for chaining
     */
    public Format setEscapeStrategy(EscapeStrategy strategy) {
        escapeStrategy = strategy;
        return this;
    }

    /**
     * Returns the current escape strategy
     *
     * @return the current escape strategy
     */
    public EscapeStrategy getEscapeStrategy() {
        return escapeStrategy;
    }

    /**
     * This will set the newline separator (lineSeparator).
     * The default is \r\n.  To make it output
     * the system default line ending string, call
     * setLineSeparator(System.getProperty("line.separator")).
     *
     * 

     * To output "UNIX-style" documents, call
     * setLineSeparator("\n").  To output "Mac-style"
     * documents, call setLineSeparator("\r").  DOS-style
     * documents use CR-LF ("\r\n"), which is the default.
     * 
     *
     * 
     * Note that this only applies to newlines generated by the
     * outputter.  If you parse an XML document that contains newlines
     * embedded inside a text node, and you do not set TextMode.NORMALIZE,
     * then the newlines will be output
     * verbatim, as "\n" which is how parsers normalize them.
     * 
     *
     * 
     * If the format's "indent" property is null (as is the default
     * for the Raw and Compact formats), then this value only effects the
     * newlines written after the declaration and doctype.
     * 
     *
     * @see #setTextMode
     *
     * @param separator String line separator to use.
     * @return a pointer to this Format for chaining
     */
    public Format setLineSeparator(String separator) {
        this.lineSeparator = separator;
        return this;
    }

    /**
     * Returns the current line separator.
     *
     * @return the current line separator
     */
    public String getLineSeparator() {
        return lineSeparator;
    }

    /**
     * This will set whether the XML declaration
     * (<?xml version="1.0"
     * encoding="UTF-8"?>)
     * includes the encoding of the document. It is common to omit
     * this in uses such as WML and other wireless device protocols.
     *
     * @param omitEncoding boolean indicating whether or not
     *        the XML declaration should indicate the document encoding.
     * @return a pointer to this Format for chaining
     */
    public Format setOmitEncoding(boolean omitEncoding) {
        this.omitEncoding = omitEncoding;
        return this;
    }

    /**
     * Returns whether the XML declaration encoding will be omitted.
     *
     * @return whether the XML declaration encoding will be omitted
     */
    public boolean getOmitEncoding() {
        return omitEncoding;
    }

    /**
     * This will set whether the XML declaration
     * (<?xml version="1.0"?gt;)
     * will be omitted or not. It is common to omit this in uses such
     * as SOAP and XML-RPC calls.
     *
     * @param omitDeclaration boolean indicating whether or not
     *        the XML declaration should be omitted.
     * @return a pointer to this Format for chaining
     */
    public Format setOmitDeclaration(boolean omitDeclaration) {
        this.omitDeclaration = omitDeclaration;
        return this;
    }

    /**
     * Returns whether the XML declaration will be omitted.
     *
     * @return whether the XML declaration will be omitted
     */
    public boolean getOmitDeclaration() {
        return omitDeclaration;
    }

    /**
     * This will set whether empty elements are expanded from
     * <tagName/> to
     * <tagName></tagName>.
     *
     * @param expandEmptyElements boolean indicating whether or not
     *        empty elements should be expanded.
     * @return a pointer to this Format for chaining
     */
    public Format setExpandEmptyElements(boolean expandEmptyElements) {
        this.expandEmptyElements = expandEmptyElements;
        return this;
    }

    /**
     * Returns whether empty elements are expanded.
     *
     * @return whether empty elements are expanded
     */
    public boolean getExpandEmptyElements() {
        return expandEmptyElements;
    }

    /**
     * This will set whether JAXP TrAX processing instructions for
     * disabling/enabling output escaping are ignored.  Disabling
     * output escaping allows using XML text as element content and
     * outputing it verbatim, i.e. as element children would be.
     * 
     * When processed, these processing instructions are removed from
     * the generated XML text and control whether the element text
     * content is output verbatim or with escaping of the pre-defined
     * entities in XML 1.0.  The text to be output verbatim shall be
     * surrounded by the
     * <?javax.xml.transform.disable-output-escaping ?>
     * and <?javax.xml.transform.enable-output-escaping ?>
     * PIs.
     * 
     * When ignored, the processing instructions are present in the
     * generated XML text and the pre-defined entities in XML 1.0 are
     * escaped.
     * 

     * Default: false.
     *
     * @param ignoreTrAXEscapingPIs boolean indicating
     *        whether or not TrAX ouput escaping PIs are ignored.
     *
     * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
     * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
     */
    public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
        this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
    }

    /**
     * Returns whether JAXP TrAX processing instructions for
     * disabling/enabling output escaping are ignored.
     *
     * @return whether or not TrAX ouput escaping PIs are ignored.
     */
    public boolean getIgnoreTrAXEscapingPIs() {
        return ignoreTrAXEscapingPIs;
    }

    /**
     * This sets the text output style.  Options are available as static
     * {@link TextMode} instances.  The default is {@link TextMode#PRESERVE}.
     *
     * @return a pointer to this Format for chaining
     */
    public Format setTextMode(Format.TextMode mode) {
        this.mode = mode;
        return this;
    }

    /**
     * Returns the current text output style.
     *
     * @return the current text output style
     */
    public Format.TextMode getTextMode() {
        return mode;
    }

    /**
     * This will set the indent String to use; this
     * is usually a String of empty spaces. If you pass
     * the empty string (""), then no indentation will happen but newlines
     * will still be generated.  Passing null will result in no indentation
     * and no newlines generated.  Default: none (null)
     *
     * @param indent String to use for indentation.
     * @return a pointer to this Format for chaining
     */
    public Format setIndent(String indent) {
        this.indent = indent;
        return this;
    }

    /**
     * Returns the indent string in use.
     *
     * @return the indent string in use
     */
    public String getIndent() {
        return indent;
    }

    /**
     * Sets the output encoding.  The name should be an accepted XML
     * encoding.
     *
     * @param encoding the encoding format.  Use XML-style names like
     *                 "UTF-8" or "ISO-8859-1" or "US-ASCII"
     * @return a pointer to this Format for chaining
     */
    public Format setEncoding(String encoding) {
        this.encoding = encoding;
        escapeStrategy = new DefaultEscapeStrategy(encoding);
        return this;
    }

    /**
     * Returns the configured output encoding.
     *
     * @return the output encoding
     */
    public String getEncoding() {
        return encoding;
    }

    public Object clone() {
        Format format = null;

        try {
            format = (Format) super.clone();
        }
        catch (CloneNotSupportedException ce) {
        }

        return format;
    }


    /**
     * Handle common charsets quickly and easily.  Use reflection
     * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
     * If JDK 1.4 isn't around, default to no special encoding.
     */
    class DefaultEscapeStrategy implements EscapeStrategy {
        private int bits;
        Object encoder;
        Method canEncode;

        public DefaultEscapeStrategy(String encoding) {
            if ("UTF-8".equalsIgnoreCase(encoding) ||
                    "UTF-16".equalsIgnoreCase(encoding)) {
                bits = 16;
            }
            else if ("ISO-8859-1".equalsIgnoreCase(encoding) ||
                    "Latin1".equalsIgnoreCase(encoding)) {
                bits = 8;
            }
            else if ("US-ASCII".equalsIgnoreCase(encoding) ||
                    "ASCII".equalsIgnoreCase(encoding)) {
                bits = 7;
            }
            else {
                bits = 0;
                //encoder = Charset.forName(encoding).newEncoder();
                try {
                    Class charsetClass = Class.forName("java.nio.charset.Charset");
                    Class encoderClass = Class.forName("java.nio.charset.CharsetEncoder");
                    Method forName = charsetClass.getMethod("forName", new Class[]{String.class});
                    Object charsetObj = forName.invoke(null, new Object[]{encoding});
                    Method newEncoder = charsetClass.getMethod("newEncoder", null);
                    encoder = newEncoder.invoke(charsetObj, null);
                    canEncode = encoderClass.getMethod("canEncode", new Class[]{char.class});
                }
                catch (Exception ignored) {
                }
            }
        }

        public boolean shouldEscape(char ch) {
            if (bits == 16) {
                if (Verifier.isHighSurrogate(ch))
                    return true;  // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4
                else
                    return false;
            }
            if (bits == 8) {
                if ((int) ch > 255)
                    return true;
                else
                    return false;
            }
            if (bits == 7) {
                if ((int) ch > 127)
                    return true;
                else
                    return false;
            }
            else {
                if (Verifier.isHighSurrogate(ch))
                    return true;  // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4
                
                if (canEncode != null && encoder != null) {
                    try {
                        Boolean val = (Boolean) canEncode.invoke(encoder, new Object[]{new Character(ch)});
                        return !val.booleanValue();
                    }
                    catch (Exception ignored) {
                    }
                }
                // Return false if we don't know.  This risks not escaping
                // things which should be escaped, but also means people won't
                // start getting loads of unnecessary escapes.
                return false;
            }
        }
    }


    /**
     * Class to signify how text should be handled on output.  The following
     * table provides details.
     *
     * 
     *   
     *     
     *     
     *   
     *
     *   
     *     
     *     
     *   
     *
     *   
     *     
     *     
     *   
     *
     *   
     *     
     *     
     *   
     *
     *   
     *     
     *     
     *   
     * 
     *       Text Mode
     *     
     *       Resulting behavior.
     *     

     *       PRESERVE (Default)
     *     
     *       All content is printed in the format it was created, no whitespace
     *       or line separators are are added or removed.
     *     

     *       TRIM_FULL_WHITE
     *     
     *       Content between tags consisting of all whitespace is not printed.
     *       If the content contains even one non-whitespace character, it is
     *       printed verbatim, whitespace and all.
     *     

     *       TRIM
     *     
     *       Same as TrimAllWhite, plus leading/trailing whitespace are
     *       trimmed.
     *     

     *       NORMALIZE
     *     
     *       Same as TextTrim, plus addition interior whitespace is compressed
     *       to a single space.
     *     
     *
     * In most cases textual content is aligned with the surrounding tags
     * (after the appropriate text mode is applied). In the case where the only
     * content between the start and end tags is textual, the start tag, text,
     * and end tag are all printed on the same line. If the document being
     * output already has whitespace, it's wise to turn on TRIM mode so the
     * pre-existing whitespace can be trimmed before adding new whitespace.
     * 
     * When a element has a xml:space attribute with the value of "preserve",
     * all formating is turned off and reverts back to the default until the
     * element and its contents have been printed. If a nested element contains
     * another xml:space with the value "default" formatting is turned back on
     * for the child element and then off for the remainder of the parent
     * element.
     */
    public static class TextMode {
        /**
         * Mode for literal text preservation.
         */
        public static final TextMode PRESERVE = new TextMode("PRESERVE");

        /**
         * Mode for text trimming (left and right trim).
         */
        public static final TextMode TRIM = new TextMode("TRIM");

        /**
         * Mode for text normalization (left and right trim plus internal
         * whitespace is normalized to a single space.
         * @see org.jdom.Element#getTextNormalize
         */
        public static final TextMode NORMALIZE = new TextMode("NORMALIZE");

        /**
         * Mode for text trimming of content consisting of nothing but
         * whitespace but otherwise not changing output.
         */
        public static final TextMode TRIM_FULL_WHITE =
                new TextMode("TRIM_FULL_WHITE");

        private final String name;

        private TextMode(String name) {
            this.name = name;
        }

        public String toString() {
            return name;
        }
    }
}
* Text Mode *	* Resulting behavior. *
* PRESERVE (Default) *	* All content is printed in the format it was created, no whitespace * or line separators are are added or removed. *
* TRIM_FULL_WHITE *	* Content between tags consisting of all whitespace is not printed. * If the content contains even one non-whitespace character, it is * printed verbatim, whitespace and all. *
* TRIM *	* Same as TrimAllWhite, plus leading/trailing whitespace are * trimmed. *
* NORMALIZE *	* Same as TextTrim, plus addition interior whitespace is compressed * to a single space. *