org.apache.hadoop.hdfs.util.XMLUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-apache Show documentation
Show all versions of hadoop-apache Show documentation
Shaded version of Apache Hadoop for Presto
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.util;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* General xml utilities.
*
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class XMLUtils {
/**
* Exception that reflects an invalid XML document.
*/
static public class InvalidXmlException extends RuntimeException {
private static final long serialVersionUID = 1L;
public InvalidXmlException(String s) {
super(s);
}
}
/**
* Exception that reflects a string that cannot be unmangled.
*/
public static class UnmanglingError extends RuntimeException {
private static final long serialVersionUID = 1L;
public UnmanglingError(String str, Exception e) {
super(str, e);
}
public UnmanglingError(String str) {
super(str);
}
}
/**
* Given a code point, determine if it should be mangled before being
* represented in an XML document.
*
* Any code point that isn't valid in XML must be mangled.
* See http://en.wikipedia.org/wiki/Valid_characters_in_XML for a
* quick reference, or the w3 standard for the authoritative reference.
*
* @param cp The code point
* @return True if the code point should be mangled
*/
private static boolean codePointMustBeMangled(int cp) {
if (cp < 0x20) {
return ((cp != 0x9) && (cp != 0xa) && (cp != 0xd));
} else if ((0xd7ff < cp) && (cp < 0xe000)) {
return true;
} else if ((cp == 0xfffe) || (cp == 0xffff)) {
return true;
} else if (cp == 0x5c) {
// we mangle backslash to simplify decoding... it's
// easier if backslashes always begin mangled sequences.
return true;
}
return false;
}
private static final int NUM_SLASH_POSITIONS = 4;
private static String mangleCodePoint(int cp) {
return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp);
}
private static String codePointToEntityRef(int cp) {
switch (cp) {
case '&':
return "&";
case '\"':
return """;
case '\'':
return "'";
case '<':
return "<";
case '>':
return ">";
default:
return null;
}
}
/**
* Mangle a string so that it can be represented in an XML document.
*
* There are three kinds of code points in XML:
* - Those that can be represented normally,
* - Those that have to be escaped (for example, & must be represented
* as &)
* - Those that cannot be represented at all in XML.
*
* The built-in SAX functions will handle the first two types for us just
* fine. However, sometimes we come across a code point of the third type.
* In this case, we have to mangle the string in order to represent it at
* all. We also mangle backslash to avoid confusing a backslash in the
* string with part our escape sequence.
*
* The encoding used here is as follows: an illegal code point is
* represented as '\ABCD;', where ABCD is the hexadecimal value of
* the code point.
*
* @param str The input string.
*
* @return The mangled string.
*/
public static String mangleXmlString(String str, boolean createEntityRefs) {
final StringBuilder bld = new StringBuilder();
final int length = str.length();
for (int offset = 0; offset < length; ) {
final int cp = str.codePointAt(offset);
final int len = Character.charCount(cp);
if (codePointMustBeMangled(cp)) {
bld.append(mangleCodePoint(cp));
} else {
String entityRef = null;
if (createEntityRefs) {
entityRef = codePointToEntityRef(cp);
}
if (entityRef != null) {
bld.append(entityRef);
} else {
for (int i = 0; i < len; i++) {
bld.append(str.charAt(offset + i));
}
}
}
offset += len;
}
return bld.toString();
}
/**
* Demangle a string from an XML document.
* See {@link #mangleXmlString(String, boolean)} for a description of the
* mangling format.
*
* @param str The string to be demangled.
*
* @return The unmangled string
* @throws UnmanglingError if the input is malformed.
*/
public static String unmangleXmlString(String str, boolean decodeEntityRefs)
throws UnmanglingError {
int slashPosition = -1;
String escapedCp = "";
StringBuilder bld = new StringBuilder();
StringBuilder entityRef = null;
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
if (entityRef != null) {
entityRef.append(ch);
if (ch == ';') {
String e = entityRef.toString();
if (e.equals(""")) {
bld.append("\"");
} else if (e.equals("'")) {
bld.append("\'");
} else if (e.equals("&")) {
bld.append("&");
} else if (e.equals("<")) {
bld.append("<");
} else if (e.equals(">")) {
bld.append(">");
} else {
throw new UnmanglingError("Unknown entity ref " + e);
}
entityRef = null;
}
} else if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) {
escapedCp += ch;
++slashPosition;
} else if (slashPosition == NUM_SLASH_POSITIONS) {
if (ch != ';') {
throw new UnmanglingError("unterminated code point escape: " +
"expected semicolon at end.");
}
try {
bld.appendCodePoint(Integer.parseInt(escapedCp, 16));
} catch (NumberFormatException e) {
throw new UnmanglingError("error parsing unmangling escape code", e);
}
escapedCp = "";
slashPosition = -1;
} else if (ch == '\\') {
slashPosition = 0;
} else {
boolean startingEntityRef = false;
if (decodeEntityRefs) {
startingEntityRef = (ch == '&');
}
if (startingEntityRef) {
entityRef = new StringBuilder();
entityRef.append("&");
} else {
bld.append(ch);
}
}
}
if (entityRef != null) {
throw new UnmanglingError("unterminated entity ref starting with " +
entityRef.toString());
} else if (slashPosition != -1) {
throw new UnmanglingError("unterminated code point escape: string " +
"broke off in the middle");
}
return bld.toString();
}
/**
* Add a SAX tag with a string inside.
*
* @param contentHandler the SAX content handler
* @param tag the element tag to use
* @param val the string to put inside the tag
*/
public static void addSaxString(ContentHandler contentHandler,
String tag, String val) throws SAXException {
contentHandler.startElement("", "", tag, new AttributesImpl());
char c[] = mangleXmlString(val, false).toCharArray();
contentHandler.characters(c, 0, c.length);
contentHandler.endElement("", "", tag);
}
/**
* Represents a bag of key-value pairs encountered during parsing an XML
* file.
*/
static public class Stanza {
private final TreeMap > subtrees;
/** The unmangled value of this stanza. */
private String value;
public Stanza() {
subtrees = new TreeMap >();
value = "";
}
public void setValue(String value) {
this.value = value;
}
public String getValue() {
return this.value;
}
/**
* Discover if a stanza has a given entry.
*
* @param name entry to look for
*
* @return true if the entry was found
*/
public boolean hasChildren(String name) {
return subtrees.containsKey(name);
}
/**
* Pull an entry from a stanza.
*
* @param name entry to look for
*
* @return the entry
*/
public List getChildren(String name) throws InvalidXmlException {
LinkedList children = subtrees.get(name);
if (children == null) {
throw new InvalidXmlException("no entry found for " + name);
}
return children;
}
/**
* Pull a string entry from a stanza.
*
* @param name entry to look for
*
* @return the entry
*/
public String getValue(String name) throws InvalidXmlException {
String ret = getValueOrNull(name);
if (ret == null) {
throw new InvalidXmlException("no entry found for " + name);
}
return ret;
}
/**
* Pull a string entry from a stanza, or null.
*
* @param name entry to look for
*
* @return the entry, or null if it was not found.
*/
public String getValueOrNull(String name) throws InvalidXmlException {
if (!subtrees.containsKey(name)) {
return null;
}
LinkedList l = subtrees.get(name);
if (l.size() != 1) {
throw new InvalidXmlException("More than one value found for " + name);
}
return l.get(0).getValue();
}
/**
* Add an entry to a stanza.
*
* @param name name of the entry to add
* @param child the entry to add
*/
public void addChild(String name, Stanza child) {
LinkedList l;
if (subtrees.containsKey(name)) {
l = subtrees.get(name);
} else {
l = new LinkedList();
subtrees.put(name, l);
}
l.add(child);
}
/**
* Convert a stanza to a human-readable string.
*/
@Override
public String toString() {
StringBuilder bld = new StringBuilder();
bld.append("{");
if (!value.equals("")) {
bld.append("\"").append(value).append("\"");
}
String prefix = "";
for (Map.Entry > entry :
subtrees.entrySet()) {
String key = entry.getKey();
LinkedList ll = entry.getValue();
for (Stanza child : ll) {
bld.append(prefix);
bld.append("<").append(key).append(">");
bld.append(child.toString());
prefix = ", ";
}
}
bld.append("}");
return bld.toString();
}
}
}