com.android.utils.XmlUtils Maven / Gradle / Ivy
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.utils;
import static com.android.SdkConstants.AMP_ENTITY;
import static com.android.SdkConstants.ANDROID_NS_NAME;
import static com.android.SdkConstants.ANDROID_URI;
import static com.android.SdkConstants.APOS_ENTITY;
import static com.android.SdkConstants.APP_PREFIX;
import static com.android.SdkConstants.GT_ENTITY;
import static com.android.SdkConstants.LT_ENTITY;
import static com.android.SdkConstants.QUOT_ENTITY;
import static com.android.SdkConstants.XMLNS;
import static com.android.SdkConstants.XMLNS_PREFIX;
import static com.android.SdkConstants.XMLNS_URI;
import static com.google.common.base.Charsets.UTF_16BE;
import static com.google.common.base.Charsets.UTF_16LE;
import static com.google.common.base.Charsets.UTF_8;
import com.android.SdkConstants;
import com.android.annotations.NonNull;
import com.android.annotations.Nullable;
import com.android.ide.common.blame.SourceFile;
import com.android.ide.common.blame.SourceFilePosition;
import com.android.ide.common.blame.SourcePosition;
import com.google.common.base.CharMatcher;
import com.google.common.io.Files;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
/** XML Utilities */
public class XmlUtils {
public static final String XML_COMMENT_BEGIN = ""; //$NON-NLS-1$
public static final String XML_PROLOG =
"\n"; //$NON-NLS-1$
/**
* Separator for xml namespace and localname
*/
public static final char NS_SEPARATOR = ':'; //$NON-NLS-1$
private static final String SOURCE_FILE_USER_DATA_KEY = "sourcefile";
/**
* Returns the namespace prefix matching the requested namespace URI.
* If no such declaration is found, returns the default "android" prefix for
* the Android URI, and "app" for other URI's. By default the app namespace
* will be created. If this is not desirable, call
* {@link #lookupNamespacePrefix(Node, String, boolean)} instead.
*
* @param node The current node. Must not be null.
* @param nsUri The namespace URI of which the prefix is to be found,
* e.g. {@link SdkConstants#ANDROID_URI}
* @return The first prefix declared or the default "android" prefix
* (or "app" for non-Android URIs)
*/
@NonNull
public static String lookupNamespacePrefix(@NonNull Node node, @NonNull String nsUri) {
String defaultPrefix = ANDROID_URI.equals(nsUri) ? ANDROID_NS_NAME : APP_PREFIX;
return lookupNamespacePrefix(node, nsUri, defaultPrefix, true /*create*/);
}
/**
* Returns the namespace prefix matching the requested namespace URI. If no
* such declaration is found, returns the default "android" prefix for the
* Android URI, and "app" for other URI's.
*
* @param node The current node. Must not be null.
* @param nsUri The namespace URI of which the prefix is to be found, e.g.
* {@link SdkConstants#ANDROID_URI}
* @param create whether the namespace declaration should be created, if
* necessary
* @return The first prefix declared or the default "android" prefix (or
* "app" for non-Android URIs)
*/
@NonNull
public static String lookupNamespacePrefix(@NonNull Node node, @NonNull String nsUri,
boolean create) {
String defaultPrefix = ANDROID_URI.equals(nsUri) ? ANDROID_NS_NAME : APP_PREFIX;
return lookupNamespacePrefix(node, nsUri, defaultPrefix, create);
}
/**
* Returns the namespace prefix matching the requested namespace URI. If no
* such declaration is found, returns the default "android" prefix.
*
* @param node The current node. Must not be null.
* @param nsUri The namespace URI of which the prefix is to be found, e.g.
* {@link SdkConstants#ANDROID_URI}
* @param defaultPrefix The default prefix (root) to use if the namespace is
* not found. If null, do not create a new namespace if this URI
* is not defined for the document.
* @param create whether the namespace declaration should be created, if
* necessary
* @return The first prefix declared or the provided prefix (possibly with a
* number appended to avoid conflicts with existing prefixes.
*/
public static String lookupNamespacePrefix(
@Nullable Node node, @Nullable String nsUri, @Nullable String defaultPrefix,
boolean create) {
// Note: Node.lookupPrefix is not implemented in wst/xml/core NodeImpl.java
// The following code emulates this simple call:
// String prefix = node.lookupPrefix(NS_RESOURCES);
// if the requested URI is null, it denotes an attribute with no namespace.
if (nsUri == null) {
return null;
}
// per XML specification, the "xmlns" URI is reserved
if (XMLNS_URI.equals(nsUri)) {
return XMLNS;
}
HashSet visited = new HashSet();
Document doc = node == null ? null : node.getOwnerDocument();
// Ask the document about it. This method may not be implemented by the Document.
String nsPrefix = null;
try {
nsPrefix = doc != null ? doc.lookupPrefix(nsUri) : null;
if (nsPrefix != null) {
return nsPrefix;
}
} catch (Throwable t) {
// ignore
}
// If that failed, try to look it up manually.
// This also gathers prefixed in use in the case we want to generate a new one below.
for (; node != null && node.getNodeType() == Node.ELEMENT_NODE;
node = node.getParentNode()) {
NamedNodeMap attrs = node.getAttributes();
for (int n = attrs.getLength() - 1; n >= 0; --n) {
Node attr = attrs.item(n);
if (XMLNS.equals(attr.getPrefix())) {
String uri = attr.getNodeValue();
nsPrefix = attr.getLocalName();
// Is this the URI we are looking for? If yes, we found its prefix.
if (nsUri.equals(uri)) {
return nsPrefix;
}
visited.add(nsPrefix);
}
}
}
// Failed the find a prefix. Generate a new sensible default prefix, unless
// defaultPrefix was null in which case the caller does not want the document
// modified.
if (defaultPrefix == null) {
return null;
}
//
// We need to make sure the prefix is not one that was declared in the scope
// visited above. Pick a unique prefix from the provided default prefix.
String prefix = defaultPrefix;
String base = prefix;
for (int i = 1; visited.contains(prefix); i++) {
prefix = base + Integer.toString(i);
}
// Also create and define this prefix/URI in the XML document as an attribute in the
// first element of the document.
if (doc != null) {
node = doc.getFirstChild();
while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
node = node.getNextSibling();
}
if (node != null && create) {
// This doesn't work:
//Attr attr = doc.createAttributeNS(XMLNS_URI, prefix);
//attr.setPrefix(XMLNS);
//
// Xerces throws
//org.w3c.dom.DOMException: NAMESPACE_ERR: An attempt is made to create or
// change an object in a way which is incorrect with regard to namespaces.
//
// Instead pass in the concatenated prefix. (This is covered by
// the UiElementNodeTest#testCreateNameSpace() test.)
Attr attr = doc.createAttributeNS(XMLNS_URI, XMLNS_PREFIX + prefix);
attr.setValue(nsUri);
node.getAttributes().setNamedItemNS(attr);
}
}
return prefix;
}
/**
* Converts the given attribute value to an XML-attribute-safe value, meaning that
* single and double quotes are replaced with their corresponding XML entities.
*
* @param attrValue the value to be escaped
* @return the escaped value
*/
@NonNull
public static String toXmlAttributeValue(@NonNull String attrValue) {
for (int i = 0, n = attrValue.length(); i < n; i++) {
char c = attrValue.charAt(i);
if (c == '"' || c == '\'' || c == '<' || c == '&') {
StringBuilder sb = new StringBuilder(2 * attrValue.length());
appendXmlAttributeValue(sb, attrValue);
return sb.toString();
}
}
return attrValue;
}
/**
* Converts the given XML-attribute-safe value to a java string
*
* @param escapedAttrValue the escaped value
* @return the unescaped value
*/
@NonNull
public static String fromXmlAttributeValue(@NonNull String escapedAttrValue) {
String workingString = escapedAttrValue.replace(QUOT_ENTITY, "\"");
workingString = workingString.replace(LT_ENTITY, "<");
workingString = workingString.replace(APOS_ENTITY, "'");
workingString = workingString.replace(AMP_ENTITY, "&");
workingString = workingString.replace(GT_ENTITY, ">");
return workingString;
}
/**
* Converts the given attribute value to an XML-text-safe value, meaning that
* less than and ampersand characters are escaped.
*
* @param textValue the text value to be escaped
* @return the escaped value
*/
@NonNull
public static String toXmlTextValue(@NonNull String textValue) {
for (int i = 0, n = textValue.length(); i < n; i++) {
char c = textValue.charAt(i);
if (c == '<' || c == '&') {
StringBuilder sb = new StringBuilder(2 * textValue.length());
appendXmlTextValue(sb, textValue);
return sb.toString();
}
}
return textValue;
}
/**
* Appends text to the given {@link StringBuilder} and escapes it as required for a
* DOM attribute node.
*
* @param sb the string builder
* @param attrValue the attribute value to be appended and escaped
*/
public static void appendXmlAttributeValue(@NonNull StringBuilder sb,
@NonNull String attrValue) {
int n = attrValue.length();
// &, ", ' and < are illegal in attributes; see http://www.w3.org/TR/REC-xml/#NT-AttValue
// (' legal in a " string and " is legal in a ' string but here we'll stay on the safe
// side)
for (int i = 0; i < n; i++) {
char c = attrValue.charAt(i);
if (c == '"') {
sb.append(QUOT_ENTITY);
} else if (c == '<') {
sb.append(LT_ENTITY);
} else if (c == '\'') {
sb.append(APOS_ENTITY);
} else if (c == '&') {
sb.append(AMP_ENTITY);
} else {
sb.append(c);
}
}
}
/**
* Appends text to the given {@link StringBuilder} and escapes it as required for a
* DOM text node.
*
* @param sb the string builder
* @param textValue the text value to be appended and escaped
*/
public static void appendXmlTextValue(@NonNull StringBuilder sb, @NonNull String textValue) {
appendXmlTextValue(sb, textValue, 0, textValue.length());
}
/**
* Appends text to the given {@link StringBuilder} and escapes it as required for a
* DOM text node.
*
* @param sb the string builder
* @param start the starting offset in the text string
* @param end the ending offset in the text string
* @param textValue the text value to be appended and escaped
*/
public static void appendXmlTextValue(@NonNull StringBuilder sb, @NonNull String textValue, int start, int end) {
for (int i = start, n = Math.min(textValue.length(), end); i < n; i++) {
char c = textValue.charAt(i);
if (c == '<') {
sb.append(LT_ENTITY);
} else if (c == '&') {
sb.append(AMP_ENTITY);
} else {
sb.append(c);
}
}
}
/**
* Returns true if the given node has one or more element children
*
* @param node the node to test for element children
* @return true if the node has one or more element children
*/
public static boolean hasElementChildren(@NonNull Node node) {
NodeList children = node.getChildNodes();
for (int i = 0, n = children.getLength(); i < n; i++) {
if (children.item(i).getNodeType() == Node.ELEMENT_NODE) {
return true;
}
}
return false;
}
/**
* Returns a character reader for the given file, which must be a UTF encoded file.
*
* The reader does not need to be closed by the caller (because the file is read in
* full in one shot and the resulting array is then wrapped in a byte array input stream,
* which does not need to be closed.)
*/
public static Reader getUtfReader(@NonNull File file) throws IOException {
byte[] bytes = Files.toByteArray(file);
int length = bytes.length;
if (length == 0) {
return new StringReader("");
}
switch (bytes[0]) {
case (byte)0xEF: {
if (length >= 3
&& bytes[1] == (byte)0xBB
&& bytes[2] == (byte)0xBF) {
// UTF-8 BOM: EF BB BF: Skip it
return new InputStreamReader(new ByteArrayInputStream(bytes, 3, length - 3),
UTF_8);
}
break;
}
case (byte)0xFE: {
if (length >= 2
&& bytes[1] == (byte)0xFF) {
// UTF-16 Big Endian BOM: FE FF
return new InputStreamReader(new ByteArrayInputStream(bytes, 2, length - 2),
UTF_16BE);
}
break;
}
case (byte)0xFF: {
if (length >= 2
&& bytes[1] == (byte)0xFE) {
if (length >= 4
&& bytes[2] == (byte)0x00
&& bytes[3] == (byte)0x00) {
// UTF-32 Little Endian BOM: FF FE 00 00
return new InputStreamReader(new ByteArrayInputStream(bytes, 4,
length - 4), "UTF-32LE");
}
// UTF-16 Little Endian BOM: FF FE
return new InputStreamReader(new ByteArrayInputStream(bytes, 2, length - 2),
UTF_16LE);
}
break;
}
case (byte)0x00: {
if (length >= 4
&& bytes[0] == (byte)0x00
&& bytes[1] == (byte)0x00
&& bytes[2] == (byte)0xFE
&& bytes[3] == (byte)0xFF) {
// UTF-32 Big Endian BOM: 00 00 FE FF
return new InputStreamReader(new ByteArrayInputStream(bytes, 4, length - 4),
"UTF-32BE");
}
break;
}
}
// No byte order mark: Assume UTF-8 (where the BOM is optional).
return new InputStreamReader(new ByteArrayInputStream(bytes), UTF_8);
}
/**
* Parses the given XML string as a DOM document, using the JDK parser. The parser does not
* validate, and is optionally namespace aware.
*
* @param xml the XML content to be parsed (must be well formed)
* @param namespaceAware whether the parser is namespace aware
* @return the DOM document
*/
@NonNull
public static Document parseDocument(@NonNull String xml, boolean namespaceAware)
throws ParserConfigurationException, IOException, SAXException {
xml = stripBom(xml);
return parseDocument(new StringReader(xml), namespaceAware);
}
/**
* Parses the given {@link Reader} as a DOM document, using the JDK parser. The parser does not
* validate, and is optionally namespace aware.
*
* @param xml a reader for the XML content to be parsed (must be well formed)
* @param namespaceAware whether the parser is namespace aware
* @return the DOM document
*/
@NonNull
public static Document parseDocument(@NonNull Reader xml, boolean namespaceAware)
throws ParserConfigurationException, IOException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
InputSource is = new InputSource(xml);
factory.setNamespaceAware(namespaceAware);
factory.setValidating(false);
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(is);
}
/**
* Parses the given UTF file as a DOM document, using the JDK parser. The parser does not
* validate, and is optionally namespace aware.
*
* @param file the UTF encoded file to parse
* @param namespaceAware whether the parser is namespace aware
* @return the DOM document
*/
@NonNull
public static Document parseUtfXmlFile(@NonNull File file, boolean namespaceAware)
throws ParserConfigurationException, IOException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
Reader reader = getUtfReader(file);
try {
InputSource is = new InputSource(reader);
factory.setNamespaceAware(namespaceAware);
factory.setValidating(false);
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(is);
} finally {
reader.close();
}
}
/** Strips out a leading UTF byte order mark, if present */
@NonNull
public static String stripBom(@NonNull String xml) {
if (!xml.isEmpty() && xml.charAt(0) == '\uFEFF') {
return xml.substring(1);
}
return xml;
}
/**
* Parses the given XML string as a DOM document, using the JDK parser. The parser does not
* validate, and is optionally namespace aware. Any parsing errors are silently ignored.
*
* @param xml the XML content to be parsed (must be well formed)
* @param namespaceAware whether the parser is namespace aware
* @return the DOM document, or null
*/
@Nullable
public static Document parseDocumentSilently(@NonNull String xml, boolean namespaceAware) {
try {
return parseDocument(xml, namespaceAware);
} catch (Exception e) {
// pass
// This method is deliberately silent; will return null
}
return null;
}
/**
* Dump an XML tree to string. This does not perform any pretty printing.
* To perform pretty printing, use {@code XmlPrettyPrinter.prettyPrint(node)} in
* {@code sdk-common}.
*/
public static String toXml(@NonNull Node node) {
return toXml(node, null);
}
public static String toXml(
@NonNull Node node,
@Nullable Map blame) {
PositionAwareStringBuilder sb = new PositionAwareStringBuilder(1000);
append(sb, node, blame);
return sb.toString();
}
/** Dump node to string without indentation adjustments */
private static void append(
@NonNull PositionAwareStringBuilder sb,
@NonNull Node node,
@Nullable Map blame) {
short nodeType = node.getNodeType();
int currentLine = sb.line;
int currentColumn = sb.column;
int currentOffset = sb.getOffset();
switch (nodeType) {
case Node.DOCUMENT_NODE:
case Node.DOCUMENT_FRAGMENT_NODE: {
sb.append(XML_PROLOG);
NodeList children = node.getChildNodes();
for (int i = 0, n = children.getLength(); i < n; i++) {
append(sb, children.item(i), blame);
}
break;
}
case Node.COMMENT_NODE:
sb.append(XML_COMMENT_BEGIN);
sb.append(node.getNodeValue());
sb.append(XML_COMMENT_END);
break;
case Node.TEXT_NODE: {
sb.append(toXmlTextValue(node.getNodeValue()));
break;
}
case Node.CDATA_SECTION_NODE: {
sb.append(""); //$NON-NLS-1$
break;
}
case Node.ELEMENT_NODE: {
sb.append('<');
Element element = (Element) node;
sb.append(element.getTagName());
NamedNodeMap attributes = element.getAttributes();
NodeList children = element.getChildNodes();
int childCount = children.getLength();
int attributeCount = attributes.getLength();
if (attributeCount > 0) {
for (int i = 0; i < attributeCount; i++) {
Node attribute = attributes.item(i);
sb.append(' ');
sb.append(attribute.getNodeName());
sb.append('=').append('"');
sb.append(toXmlAttributeValue(attribute.getNodeValue()));
sb.append('"');
}
}
if (childCount == 0) {
sb.append('/');
}
sb.append('>');
if (childCount > 0) {
for (int i = 0; i < childCount; i++) {
Node child = children.item(i);
append(sb, child, blame);
}
sb.append('<').append('/');
sb.append(element.getTagName());
sb.append('>');
}
if (blame != null) {
SourceFilePosition position = getSourceFilePosition(node);
if (!position.equals(SourceFilePosition.UNKNOWN)) {
blame.put(
new SourcePosition(
currentLine, currentColumn, currentOffset,
sb.line, sb.column, sb.getOffset()),
position);
}
}
break;
}
default:
throw new UnsupportedOperationException(
"Unsupported node type " + nodeType + ": not yet implemented");
}
}
/**
* Wraps a StringBuilder, but keeps track of the line and column of the end of the string.
*
* It implements append(String) and append(char) which as well as delegating to the underlying
* StringBuilder also keep track of any new lines, and set the line and column fields.
* The StringBuilder itself keeps track of the actual character offset.
*/
private static class PositionAwareStringBuilder {
@SuppressWarnings("StringBufferField")
private final StringBuilder sb;
int line = 0;
int column = 0;
public PositionAwareStringBuilder(int size) {
sb = new StringBuilder(size);
}
public PositionAwareStringBuilder append(String text) {
sb.append(text);
// we find the last, as it might be useful later.
int lastNewLineIndex = text.lastIndexOf('\n');
if (lastNewLineIndex == -1) {
// If it does not contain a new line, we just increase the column number.
column += text.length();
} else {
// The string could contain multiple new lines.
line += CharMatcher.is('\n').countIn(text);
// But for column we only care about the number of characters after the last one.
column = text.length() - lastNewLineIndex - 1;
}
return this;
}
public PositionAwareStringBuilder append(char character) {
sb.append(character);
if (character == '\n') {
line += 1;
column = 0;
} else {
column++;
}
return this;
}
public int getOffset() {
return sb.length();
}
@Override
public String toString() {
return sb.toString();
}
}
public static void attachSourceFile(Node node, SourceFile sourceFile) {
node.setUserData(SOURCE_FILE_USER_DATA_KEY, sourceFile, null);
}
public static SourceFilePosition getSourceFilePosition(Node node) {
SourceFile sourceFile = (SourceFile) node.getUserData(SOURCE_FILE_USER_DATA_KEY);
if (sourceFile == null) {
sourceFile = SourceFile.UNKNOWN;
}
return new SourceFilePosition(sourceFile, PositionXmlParser.getPosition(node));
}
/**
* Format the given floating value into an XML string, omitting decimals if
* 0
*
* @param value the value to be formatted
* @return the corresponding XML string for the value
*/
public static String formatFloatAttribute(double value) {
if (value != (int) value) {
// Run String.format without a locale, because we don't want locale-specific
// conversions here like separating the decimal part with a comma instead of a dot!
return String.format((Locale) null, "%.2f", value); //$NON-NLS-1$
} else {
return Integer.toString((int) value);
}
}
/**
* Returns the name of the root element tag stored in the given file, or null if it can't be
* determined.
*/
@Nullable
public static String getRootTagName(@NonNull File xmlFile) {
try (InputStream stream = new BufferedInputStream(new FileInputStream(xmlFile))) {
XMLInputFactory factory = XMLInputFactory.newFactory();
XMLStreamReader xmlStreamReader =
factory.createXMLStreamReader(stream);
while (xmlStreamReader.hasNext()) {
int event = xmlStreamReader.next();
if (event == XMLStreamReader.START_ELEMENT) {
return xmlStreamReader.getLocalName();
}
}
} catch (XMLStreamException | IOException ignored) {
// Ignored.
}
return null;
}
}