com.android.ide.common.xml.XmlPrettyPrinter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sdk-common Show documentation
sdk-common library used by other Android tools libraries.
There is a newer version: 25.3.0
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.ide.common.xml;

import com.android.SdkConstants;
import com.android.annotations.NonNull;
import com.android.annotations.Nullable;
import com.android.resources.ResourceFolderType;
import com.android.utils.SdkUtils;
import com.android.utils.XmlUtils;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import com.google.common.io.Files;

import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import static com.android.SdkConstants.DOT_XML;
import static com.android.SdkConstants.TAG_COLOR;
import static com.android.SdkConstants.TAG_DIMEN;
import static com.android.SdkConstants.TAG_ITEM;
import static com.android.SdkConstants.TAG_STRING;
import static com.android.SdkConstants.TAG_STYLE;
import static com.android.SdkConstants.XMLNS;
import static com.android.utils.XmlUtils.XML_COMMENT_BEGIN;
import static com.android.utils.XmlUtils.XML_COMMENT_END;
import static com.android.utils.XmlUtils.XML_PROLOG;

/**
 * Visitor which walks over the subtree of the DOM to be formatted and pretty prints
 * the DOM into the given {@link StringBuilder}
 */
public class XmlPrettyPrinter {

    /** The style to print the XML in */
    private final XmlFormatStyle mStyle;

    /** Formatting preferences to use when formatting the XML */
    private final XmlFormatPreferences mPrefs;
    /** Start node to start formatting at */
    private Node mStartNode;
    /** Start node to stop formatting after */
    private Node mEndNode;
    /** Whether the visitor is currently in range */
    private boolean mInRange;
    /** Output builder */
    @SuppressWarnings("StringBufferField")
    private StringBuilder mOut;
    /** String to insert for a single indentation level */
    private String mIndentString;
    /** Line separator to use */
    private String mLineSeparator;
    /** If true, we're only formatting an open tag */
    private boolean mOpenTagOnly;
    /** List of indentation to use for each given depth */
    private String[] mIndentationLevels;
    /** Whether the formatter should end the document with a newline */
    private boolean mEndWithNewline;

    /**
     * Creates a new {@link XmlPrettyPrinter}
     *
     * @param prefs the preferences to format with
     * @param style the style to format with
     * @param lineSeparator the line separator to use, such as "\n" (can be null, in which
     *            case the system default is looked up via the line.separator property)
     */
    public XmlPrettyPrinter(XmlFormatPreferences prefs, XmlFormatStyle style,
            String lineSeparator) {
        mPrefs = prefs;
        mStyle = style;
        if (lineSeparator == null) {
            lineSeparator = SdkUtils.getLineSeparator();
        }
        mLineSeparator = lineSeparator;
    }

    /**
     * Sets whether the document should end with a newline/ line separator
     *
     * @param endWithNewline if true, ensure that the document ends with a newline
     * @return this, for constructor chaining
     */
    public XmlPrettyPrinter setEndWithNewline(boolean endWithNewline) {
        mEndWithNewline = endWithNewline;
        return this;
    }

    /**
     * Sets the indentation levels to use (indentation string to use for each depth,
     * indexed by depth
     *
     * @param indentationLevels an array of strings to use for the various indentation
     *            levels
     */
    public void setIndentationLevels(String[] indentationLevels) {
        mIndentationLevels = indentationLevels;
    }

    @NonNull
    private String getLineSeparator() {
        return mLineSeparator;
    }

    /**
     * Pretty-prints the given XML document, which must be well-formed. If it is not,
     * the original unformatted XML document is returned
     *
     * @param xml the XML content to format
     * @param prefs the preferences to format with
     * @param style the style to format with
     * @param lineSeparator the line separator to use, such as "\n" (can be null, in which
     *     case the system default is looked up via the line.separator property)
     * @return the formatted document (or if a parsing error occurred, returns the
     *     unformatted document)
     */
    @NonNull
    public static String prettyPrint(
            @NonNull String xml,
            @NonNull XmlFormatPreferences prefs,
            @NonNull XmlFormatStyle style,
            @Nullable String lineSeparator) {
        Document document = XmlUtils.parseDocumentSilently(xml, true);
        if (document != null) {
            XmlPrettyPrinter printer = new XmlPrettyPrinter(prefs, style, lineSeparator);
            printer.setEndWithNewline(xml.endsWith(printer.getLineSeparator()));
            StringBuilder sb = new StringBuilder(3 * xml.length() / 2);
            printer.prettyPrint(-1, document, null, null, sb, false /*openTagOnly*/);
            return sb.toString();
        } else {
            // Parser error: just return the unformatted content
            return xml;
        }
    }

    /**
     * Pretty prints the given node
     *
     * @param node the node, usually a document, to be printed
     * @param prefs the formatting preferences
     * @param style the formatting style to use
     * @param lineSeparator the line separator to use, or null to use the
     *            default
     * @return a formatted string
     * @deprecated Use {@link #prettyPrint(org.w3c.dom.Node, XmlFormatPreferences,
     *      XmlFormatStyle, String, boolean)} instead
     */
    @NonNull
    @Deprecated
    public static String prettyPrint(
            @NonNull Node node,
            @NonNull XmlFormatPreferences prefs,
            @NonNull XmlFormatStyle style,
            @Nullable String lineSeparator) {
        return prettyPrint(node, prefs, style, lineSeparator, false);
    }

    /**
     * Pretty prints the given node
     *
     * @param node the node, usually a document, to be printed
     * @param prefs the formatting preferences
     * @param style the formatting style to use
     * @param lineSeparator the line separator to use, or null to use the
     *            default
     * @param endWithNewline if true, ensure that the printed output ends with a newline
     * @return a formatted string
     */
    @NonNull
    public static String prettyPrint(
            @NonNull Node node,
            @NonNull XmlFormatPreferences prefs,
            @NonNull XmlFormatStyle style,
            @Nullable String lineSeparator,
            boolean endWithNewline) {
        XmlPrettyPrinter printer = new XmlPrettyPrinter(prefs, style, lineSeparator);
        printer.setEndWithNewline(endWithNewline);
        StringBuilder sb = new StringBuilder(1000);
        printer.prettyPrint(-1, node, null, null, sb, false /*openTagOnly*/);
        String xml = sb.toString();
        if (node.getNodeType() == Node.DOCUMENT_NODE && !xml.startsWith("').append(mLineSeparator);
    }

    @Nullable
    @SuppressWarnings("MethodMayBeStatic") // Intentionally instance method so it can be overridden
    protected String getSource(@NonNull Node node) {
        return null;
    }

    private void printDocType(Node node) {
        String content = getSource(node);
        if (content != null) {
            mOut.append(content);
            mOut.append(mLineSeparator);
        }
    }

    private void printCharacterData(Node node) {
        String nodeValue = node.getNodeValue();
        boolean separateLine = nodeValue.indexOf('\n') != -1;
        if (separateLine && !endsWithLineSeparator()) {
            mOut.append(mLineSeparator);
        }
        mOut.append("");       //$NON-NLS-1$
        if (separateLine) {
            mOut.append(mLineSeparator);
        }
    }

    private void printText(Node node) {
        boolean escape = true;
        String text = node.getNodeValue();

        String source = getSource(node);
        if (source != null) {
            // Get the original source string. This will contain the actual entities
            // such as ">" instead of ">" which it gets turned into for the DOM nodes.
            // By operating on source we can preserve the user's entities rather than
            // having > for example always turned into >.
            text = source;
            escape = false;
        }

        // Most text nodes are just whitespace for formatting (which we're replacing)
        // so look for actual text content and extract that part out
        String trimmed = text.trim();
        if (!trimmed.isEmpty()) {
            // TODO: Reformat the contents if it is too wide?

            // Note that we append the actual text content, NOT the trimmed content,
            // since the whitespace may be significant, e.g.
            // Sync error: %1$s...

            // However, we should remove all blank lines in the prefix and suffix of the
            // text node, or we will end up inserting additional blank lines each time you're
            // formatting a text node within an outer element (which also adds spacing lines)
            int lastPrefixNewline = -1;
            for (int i = 0, n = text.length(); i < n; i++) {
                char c = text.charAt(i);
                if (c == '\n') {
                    lastPrefixNewline = i;
                } else if (!Character.isWhitespace(c)) {
                    break;
                }
            }
            int firstSuffixNewline = -1;
            for (int i = text.length() - 1; i >= 0; i--) {
                char c = text.charAt(i);
                if (c == '\n') {
                    firstSuffixNewline = i;
                } else if (!Character.isWhitespace(c)) {
                    break;
                }
            }
            if (lastPrefixNewline != -1 || firstSuffixNewline != -1) {
                boolean stripSuffix;
                if (firstSuffixNewline == -1) {
                    firstSuffixNewline = text.length();
                    stripSuffix = false;
                } else {
                    stripSuffix = true;
                }

                int stripFrom = lastPrefixNewline + 1;
                if (firstSuffixNewline >= stripFrom) {
                    text = text.substring(stripFrom, firstSuffixNewline);

                    // In markup strings we may need to preserve spacing on the left and/or
                    // right if we're next to a markup string on the given side
                    if (lastPrefixNewline != -1) {
                        Node left = node.getPreviousSibling();
                        if (left != null && left.getNodeType() == Node.ELEMENT_NODE
                                && isMarkupElement((Element) left)) {
                            text = ' ' + text;
                        }
                    }
                    if (stripSuffix) {
                        Node right = node.getNextSibling();
                        if (right != null && right.getNodeType() == Node.ELEMENT_NODE
                                && isMarkupElement((Element) right)) {
                            text += ' ';
                        }
                    }
                }
            }

            if (escape) {
                XmlUtils.appendXmlTextValue(mOut, text);
            } else {
                // Text is already escaped
                mOut.append(text);
            }

            if (mStyle != XmlFormatStyle.RESOURCE) {
                mOut.append(mLineSeparator);
            }
        } else {
            // Ensure that if we're in the middle of a markup string, we preserve spacing.
            // In other words, "first second" - we don't want that middle
            // space to disappear, but we do want repeated spaces to collapse into one.
            Node left = node.getPreviousSibling();
            Node right = node.getNextSibling();
            if (left != null && right != null
                    && left.getNodeType() == Node.ELEMENT_NODE
                    && right.getNodeType() == Node.ELEMENT_NODE
                    && isMarkupElement((Element)left)) {
                mOut.append(' ');
            }
        }
    }

    private void printComment(int depth, Node node) {
        String comment = node.getNodeValue();
        boolean multiLine = comment.indexOf('\n') != -1;
        String trimmed = comment.trim();

        // See if this is an "end-of-the-line" comment, e.g. it is not a multi-line
        // comment and it appears on the same line as an opening or closing element tag;
        // if so, continue to place it as a suffix comment
        boolean isSuffixComment = false;
        if (!multiLine) {
            Node previous = node.getPreviousSibling();
            isSuffixComment = true;
            if (previous == null && node.getParentNode().getNodeType() == Node.DOCUMENT_NODE) {
                isSuffixComment = false;
            }
            while (previous != null) {
                short type = previous.getNodeType();
                if (type == Node.COMMENT_NODE) {
                    isSuffixComment = false;
                    break;
                } else if (type == Node.TEXT_NODE) {
                    if (previous.getNodeValue().indexOf('\n') != -1) {
                        isSuffixComment = false;
                        break;
                    }
                } else {
                    break;
                }
                previous = previous.getPreviousSibling();
            }
            if (isSuffixComment) {
                // Remove newline added by element open tag or element close tag
                if (endsWithLineSeparator()) {
                    removeLastLineSeparator();
                }
                mOut.append(' ');
            }
        }

        // Put the comment on a line on its own? Only if it was separated by a blank line
        // in the previous version of the document. In other words, if the document
        // adds blank lines between comments this formatter will preserve that fact, and vice
        // versa for a tightly formatted document it will preserve that convention as well.
        if (!mPrefs.removeEmptyLines && !isSuffixComment) {
            Node curr = node.getPreviousSibling();
            if (curr == null) {
                if (mOut.length() > 0 && !endsWithLineSeparator()) {
                    mOut.append(mLineSeparator);
                }
            } else if (curr.getNodeType() == Node.TEXT_NODE) {
                String text = curr.getNodeValue();
                // Count how many newlines we find in the trailing whitespace of the
                // text node
                int newLines = 0;
                for (int i = text.length() - 1; i >= 0; i--) {
                    char c = text.charAt(i);
                    if (Character.isWhitespace(c)) {
                        if (c == '\n') {
                            newLines++;
                            if (newLines == 2) {
                                break;
                            }
                        }
                    } else {
                        break;
                    }
                }
                if (newLines >= 2) {
                    mOut.append(mLineSeparator);
                } else if (text.trim().isEmpty() && curr.getPreviousSibling() == null) {
                    // Comment before first child in node
                    mOut.append(mLineSeparator);
                }
            }
        }


        // TODO: Reformat the comment text?
        if (!multiLine) {
            if (!isSuffixComment) {
                indent(depth);
            }
            mOut.append(XML_COMMENT_BEGIN).append(' ');
            mOut.append(trimmed);
            mOut.append(' ').append(XML_COMMENT_END);
            mOut.append(mLineSeparator);
        } else {
            // Strip off blank lines at the beginning and end of the comment text.
            // Find last newline at the beginning of the text:
            int index = 0;
            int end = comment.length();
            int recentNewline = -1;
            while (index < end) {
                char c = comment.charAt(index);
                if (c == '\n') {
                    recentNewline = index;
                }
                if (!Character.isWhitespace(c)) {
                    break;
                }
                index++;
            }

            int start = recentNewline + 1;

            // Find last newline at the end of the text
            index = end - 1;
            recentNewline = -1;
            while (index > start) {
                char c = comment.charAt(index);
                if (c == '\n') {
                    recentNewline = index;
                }
                if (!Character.isWhitespace(c)) {
                    break;
                }
                index--;
            }

            end = recentNewline == -1 ? index + 1 : recentNewline;
            if (start >= end) {
                // It's a blank comment like  - just clean it up
                if (!isSuffixComment) {
                    indent(depth);
                }
                mOut.append(XML_COMMENT_BEGIN).append(' ').append(XML_COMMENT_END);
                mOut.append(mLineSeparator);
                return;
            }

            trimmed = comment.substring(start, end);

            // When stripping out prefix and suffix blank lines we might have ended up
            // with a single line comment again so check and format single line comments
            // without newlines inside the  delimiters
            multiLine = trimmed.indexOf('\n') != -1;
            if (multiLine) {
                indent(depth);
                mOut.append(XML_COMMENT_BEGIN);
                mOut.append(mLineSeparator);

                // See if we need to add extra spacing to keep alignment. Consider a comment
                // like this:
                // 
                // This String will be
                // " Deprecated strings - Move the identifiers to this section,\n" +
                // "     and remove the actual text. -->"
                // where the left side column no longer lines up.
                // To fix this, we need to insert some extra whitespace into the first line
                // of the string; in particular, the exact number of characters that the
                // first line of the comment was indented with!

                // However, if the comment started like this:
                // 
                // then obviously the align-indent is 0, so we only want to compute an
                // align indent when we don't find a newline before the content
                boolean startsWithNewline = false;
                for (int i = 0; i < start; i++) {
                    if (comment.charAt(i) == '\n') {
                        startsWithNewline = true;
                        break;
                    }
                }
                if (!startsWithNewline) {
                    Node previous = node.getPreviousSibling();
                    if (previous != null && previous.getNodeType() == Node.TEXT_NODE) {
                        String prevText = previous.getNodeValue();
                        int indentation = XML_COMMENT_BEGIN.length();
                        for (int i = prevText.length() - 1; i >= 0; i--) {
                            char c = prevText.charAt(i);
                            if (c == '\n') {
                                break;
                            } else {
                                indentation += (c == '\t') ? mPrefs.getTabWidth() : 1;
                            }
                        }

                        // See if the next line after the newline has indentation; if it doesn't,
                        // leave things alone. This fixes a case like this:
                        //     
                        // such that it doesn't turn it into
                        //     
                        // In this case we instead want
                        //     
                        int minIndent = Integer.MAX_VALUE;
                        String[] lines = trimmed.split("\n"); //$NON-NLS-1$
                        // Skip line 0 since we know that it doesn't start with a newline
                        for (int i = 1; i < lines.length; i++) {
                            int indent = 0;
                            String line = lines[i];
                            for (int j = 0; j < line.length(); j++) {
                                char c = line.charAt(j);
                                if (!Character.isWhitespace(c)) {
                                    // Only set minIndent if there's text content on the line;
                                    // blank lines can exist in the comment without affecting
                                    // the overall minimum indentation boundary.
                                    if (indent < minIndent) {
                                        minIndent = indent;
                                    }
                                    break;
                                } else {
                                    indent += (c == '\t') ? mPrefs.getTabWidth() : 1;
                                }
                            }
                        }

                        if (minIndent < indentation) {
                            indentation = minIndent;

                            // Subtract any indentation that is already present on the line
                            String line = lines[0];
                            for (int j = 0; j < line.length(); j++) {
                                char c = line.charAt(j);
                                if (!Character.isWhitespace(c)) {
                                    break;
                                } else {
                                    indentation -= (c == '\t') ? mPrefs.getTabWidth() : 1;
                                }
                            }
                        }

                        for (int i = 0; i < indentation; i++) {
                            mOut.append(' ');
                        }

                        if (indentation < 0) {
                            boolean prefixIsSpace = true;
                            for (int i = 0; i < -indentation && i < trimmed.length(); i++) {
                                if (!Character.isWhitespace(trimmed.charAt(i))) {
                                    prefixIsSpace = false;
                                    break;
                                }
                            }
                            if (prefixIsSpace) {
                                trimmed = trimmed.substring(-indentation);
                            }
                        }
                    }
                }

                mOut.append(trimmed);
                mOut.append(mLineSeparator);
                indent(depth);
                mOut.append(XML_COMMENT_END);
                mOut.append(mLineSeparator);
            } else {
                mOut.append(XML_COMMENT_BEGIN).append(' ');
                mOut.append(trimmed);
                mOut.append(' ').append(XML_COMMENT_END);
                mOut.append(mLineSeparator);
            }
        }

        // Preserve whitespace after comment: See if the original document had two or
        // more newlines after the comment, and if so have a blank line between this
        // comment and the next
        Node next = node.getNextSibling();
        if (!mPrefs.removeEmptyLines && (next != null)
                && (next.getNodeType() == Node.TEXT_NODE)) {
            String text = next.getNodeValue();
            int newLinesBeforeText = 0;
            for (int i = 0, n = text.length(); i < n; i++) {
                char c = text.charAt(i);
                if (c == '\n') {
                    newLinesBeforeText++;
                    if (newLinesBeforeText == 2) {
                        // Yes
                        mOut.append(mLineSeparator);
                        break;
                    }
                } else if (!Character.isWhitespace(c)) {
                    break;
                }
            }
        }
    }

    private boolean endsWithLineSeparator() {
        int separatorLength = mLineSeparator.length();
        if (mOut.length() >= separatorLength) {
            for (int i = 0, j = mOut.length() - separatorLength; i < separatorLength; i++) {
                if (mOut.charAt(j) != mLineSeparator.charAt(i)) {
                    return false;
                }
            }
        }

        return true;
    }

    private void removeLastLineSeparator() {
        int newLength = mOut.length() - mLineSeparator.length();
        if (newLength >= 0) {
            mOut.setLength(newLength);
        }
    }

    private void printOpenElementTag(int depth, Node node) {
        Element element = (Element) node;
        if (newlineBeforeElementOpen(element, depth)) {
            mOut.append(mLineSeparator);
        }
        if (indentBeforeElementOpen(element, depth)) {
            indent(depth);
        }
        mOut.append('<').append(element.getTagName());

        NamedNodeMap attributes = element.getAttributes();
        int attributeCount = attributes.getLength();
        if (attributeCount > 0) {
            // Sort the attributes
            List attributeList = new ArrayList();
            for (int i = 0; i < attributeCount; i++) {
                attributeList.add((Attr) attributes.item(i));
            }
            Comparator comparator = mPrefs.getAttributeComparator();
            if (comparator != null) {
                Collections.sort(attributeList, comparator);
            }

            // Put the single attribute on the same line as the element tag?
            boolean singleLine = mPrefs.oneAttributeOnFirstLine && attributeCount == 1
                    // In resource files we always put all the attributes (which is
                    // usually just zero, one or two) on the same line
                    || mStyle == XmlFormatStyle.RESOURCE;

            // We also place the namespace declaration on the same line as the root element,
            // but this doesn't also imply singleLine handling; subsequent attributes end up
            // on their own lines
            boolean indentNextAttribute;
            if (singleLine || (depth == 0 && XMLNS.equals(attributeList.get(0).getPrefix()))) {
                mOut.append(' ');
                indentNextAttribute = false;
            } else {
                mOut.append(mLineSeparator);
                indentNextAttribute = true;
            }

            Attr last = attributeList.get(attributeCount - 1);
            for (Attr attribute : attributeList) {
                if (indentNextAttribute) {
                    indent(depth + 1);
                }
                mOut.append(attribute.getName());
                mOut.append('=').append('"');
                XmlUtils.appendXmlAttributeValue(mOut, attribute.getValue());
                mOut.append('"');

                // Don't add a newline at the last attribute line; the > should
                // immediately follow the last attribute
                if (attribute != last) {
                    mOut.append(singleLine ? " " : mLineSeparator); //$NON-NLS-1$
                    indentNextAttribute = !singleLine;
                }
            }
        }

        boolean isClosed = isEmptyTag(element);

        // Add a space before the > or /> ? In resource files, only do this when closing the
        // element
        if (mPrefs.spaceBeforeClose && (mStyle != XmlFormatStyle.RESOURCE || isClosed)
                // in  files etc still treat the  entries as in resource files
                && !TAG_ITEM.equals(element.getTagName())
                && (isClosed || element.getAttributes().getLength() > 0)) {
            mOut.append(' ');
        }

        if (isClosed) {
            mOut.append('/');
        }

        mOut.append('>');

        if (newlineAfterElementOpen(element, depth, isClosed)) {
            mOut.append(mLineSeparator);
        }
    }

    private void printCloseElementTag(int depth, Node node) {
        Element element = (Element) node;
        if (isEmptyTag(element)) {
            // Empty tag: Already handled as part of opening tag
            return;
        }

        // Put the closing declaration on its own line - unless it's a compact
        // resource file format
        // If the element had element children, separate the end tag from them
        if (newlineBeforeElementClose(element, depth)) {
            mOut.append(mLineSeparator);
        }
        if (indentBeforeElementClose(element, depth)) {
            indent(depth);
        }
        mOut.append('<').append('/');
        mOut.append(node.getNodeName());
        mOut.append('>');

        if (newlineAfterElementClose(element, depth)) {
            mOut.append(mLineSeparator);
        }
    }

    private boolean newlineBeforeElementOpen(Element element, int depth) {
        if (hasBlankLineAbove()) {
            return false;
        }

        if (mPrefs.removeEmptyLines || depth <= 0) {
            return false;
        }

        if (isMarkupElement(element)) {
            return false;
        }

        // See if this element should be separated from the previous element.
        // This is the case if we are not compressing whitespace (checked above),
        // or if we are not immediately following a comment (in which case the
        // newline would have been added above it), or if we are not in a formatting
        // style where
        if (mStyle == XmlFormatStyle.LAYOUT) {
            // In layouts we always separate elements
            return true;
        }

        if (mStyle == XmlFormatStyle.MANIFEST || mStyle == XmlFormatStyle.RESOURCE
                || mStyle == XmlFormatStyle.FILE) {
            Node curr = element.getPreviousSibling();

            //