All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.simplity.json.XML Maven / Gradle / Ivy

The newest version!
package org.simplity.json;

/*
Copyright (c) 2015 JSON.org

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

The Software shall be used for Good, not Evil.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

import java.util.Iterator;

/**
 * This provides static methods to convert an XML text into a JSONObject, and to
 * covert a JSONObject into an XML text.
 * 
 * @author JSON.org
 * @version 2016-08-10
 */
@SuppressWarnings("boxing")
public class XML {
    /** The Character '&'. */
    public static final Character AMP = '&';

    /** The Character '''. */
    public static final Character APOS = '\'';

    /** The Character '!'. */
    public static final Character BANG = '!';

    /** The Character '='. */
    public static final Character EQ = '=';

    /** The Character '>'. */
    public static final Character GT = '>';

    /** The Character '<'. */
    public static final Character LT = '<';

    /** The Character '?'. */
    public static final Character QUEST = '?';

    /** The Character '"'. */
    public static final Character QUOT = '"';

    /** The Character '/'. */
    public static final Character SLASH = '/';
    
    /**
     * Creates an iterator for navigating Code Points in a string instead of
     * characters. Once Java7 support is dropped, this can be replaced with
     * 
     * string.codePoints()
     * 
     * which is available in Java8 and above.
     * 
     * @see http://stackoverflow.com/a/21791059/6030888
     */
    private static Iterable codePointIterator(final String string) {
        return new Iterable() {
            @Override
            public Iterator iterator() {
                return new Iterator() {
                    private int nextIndex = 0;
                    private int length = string.length();

                    @Override
                    public boolean hasNext() {
                        return this.nextIndex < this.length;
                    }

                    @Override
                    public Integer next() {
                        int result = string.codePointAt(this.nextIndex);
                        this.nextIndex += Character.charCount(result);
                        return result;
                    }

                    @Override
                    public void remove() {
                        throw new UnsupportedOperationException();
                    }
                };
            }
        };
    }

    /**
     * Replace special characters with XML escapes:
     * 
     * 
     * & (ampersand) is replaced by &amp;
     * < (less than) is replaced by &lt;
     * > (greater than) is replaced by &gt;
     * " (double quote) is replaced by &quot;
     * ' (single quote / apostrophe) is replaced by &apos;
     * 
* * @param string * The string to be escaped. * @return The escaped string. */ public static String escape(String string) { StringBuilder sb = new StringBuilder(string.length()); for (final int cp : codePointIterator(string)) { switch (cp) { case '&': sb.append("&"); break; case '<': sb.append("<"); break; case '>': sb.append(">"); break; case '"': sb.append("""); break; case '\'': sb.append("'"); break; default: if (mustEscape(cp)) { sb.append("&#x"); sb.append(Integer.toHexString(cp)); sb.append(";"); } else { sb.appendCodePoint(cp); } } } return sb.toString(); } /** * @param cp code point to test * @return true if the code point is not valid for an XML */ private static boolean mustEscape(int cp) { /* Valid range from https://www.w3.org/TR/REC-xml/#charsets * * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] * * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ // isISOControl is true when (cp >= 0 && cp <= 0x1F) || (cp >= 0x7F && cp <= 0x9F) // all ISO control characters are out of range except tabs and new lines return (Character.isISOControl(cp) && cp != 0x9 && cp != 0xA && cp != 0xD ) || !( // valid the range of acceptable characters that aren't control (cp >= 0x20 && cp <= 0xD7FF) || (cp >= 0xE000 && cp <= 0xFFFD) || (cp >= 0x10000 && cp <= 0x10FFFF) ) ; } /** * Removes XML escapes from the string. * * @param string * string to remove escapes from * @return string with converted entities */ public static String unescape(String string) { StringBuilder sb = new StringBuilder(string.length()); for (int i = 0, length = string.length(); i < length; i++) { char c = string.charAt(i); if (c == '&') { final int semic = string.indexOf(';', i); if (semic > i) { final String entity = string.substring(i + 1, semic); if (entity.charAt(0) == '#') { int cp; if (entity.charAt(1) == 'x') { // hex encoded unicode cp = Integer.parseInt(entity.substring(2), 16); } else { // decimal encoded unicode cp = Integer.parseInt(entity.substring(1)); } sb.appendCodePoint(cp); } else { if ("quot".equalsIgnoreCase(entity)) { sb.append('"'); } else if ("amp".equalsIgnoreCase(entity)) { sb.append('&'); } else if ("apos".equalsIgnoreCase(entity)) { sb.append('\''); } else if ("lt".equalsIgnoreCase(entity)) { sb.append('<'); } else if ("gt".equalsIgnoreCase(entity)) { sb.append('>'); } else { sb.append('&').append(entity).append(';'); } } // skip past the entity we just parsed. i += entity.length() + 1; } else { // this shouldn't happen in most cases since the parser // errors on unclosed entities. sb.append(c); } } else { // not part of an entity sb.append(c); } } return sb.toString(); } /** * Throw an exception if the string contains whitespace. Whitespace is not * allowed in tagNames and attributes. * * @param string * A string. * @throws JSONException Thrown if the string contains whitespace or is empty. */ public static void noSpace(String string) throws JSONException { int i, length = string.length(); if (length == 0) { throw new JSONException("Empty string."); } for (i = 0; i < length; i += 1) { if (Character.isWhitespace(string.charAt(i))) { throw new JSONException("'" + string + "' contains a space character."); } } } /** * Scan the content following the named tag, attaching it to the context. * * @param x * The XMLTokener containing the source string. * @param context * The JSONObject that will include the new material. * @param name * The tag name. * @return true if the close tag is processed. * @throws JSONException */ private static boolean parse(XMLTokener x, JSONObject context, String name, boolean keepStrings) throws JSONException { char c; int i; JSONObject jsonobject = null; String string; String tagName; Object token; // Test for and skip past these forms: // // // // // Report errors for these forms: // <> // <= // << token = x.nextToken(); // "); return false; } x.back(); } else if (c == '[') { token = x.nextToken(); if ("CDATA".equals(token)) { if (x.next() == '[') { string = x.nextCDATA(); if (string.length() > 0) { context.accumulate("content", string); } return false; } } throw x.syntaxError("Expected 'CDATA['"); } i = 1; do { token = x.nextMeta(); if (token == null) { throw x.syntaxError("Missing '>' after ' 0); return false; } else if (token == QUEST) { // "); return false; } else if (token == SLASH) { // Close tag if (x.nextToken() != GT) { throw x.syntaxError("Misshaped tag"); } if (jsonobject.length() > 0) { context.accumulate(tagName, jsonobject); } else { context.accumulate(tagName, ""); } return false; } else if (token == GT) { // Content, between <...> and for (;;) { token = x.nextContent(); if (token == null) { if (tagName != null) { throw x.syntaxError("Unclosed tag " + tagName); } return false; } else if (token instanceof String) { string = (String) token; if (string.length() > 0) { jsonobject.accumulate("content", keepStrings ? unescape(string) : stringToValue(string)); } } else if (token == LT) { // Nested element if (parse(x, jsonobject, tagName,keepStrings)) { if (jsonobject.length() == 0) { context.accumulate(tagName, ""); } else if (jsonobject.length() == 1 && jsonobject.opt("content") != null) { context.accumulate(tagName, jsonobject.opt("content")); } else { context.accumulate(tagName, jsonobject); } return false; } } } } else { throw x.syntaxError("Misshaped tag"); } } } } /** * This method is the same as {@link JSONObject.stringToValue(String)} * except that this also tries to unescape String values. * * @param string String to convert * @return JSON value of this string or the string */ public static Object stringToValue(String string) { Object ret = JSONObject.stringToValue(string); if(ret instanceof String){ return unescape((String)ret); } return ret; } /** * Convert a well-formed (but not necessarily valid) XML string into a * JSONObject. Some information may be lost in this transformation because * JSON is a data format and XML is a document format. XML uses elements, * attributes, and content text, while JSON uses unordered collections of * name/value pairs and arrays of values. JSON does not does not like to * distinguish between elements and attributes. Sequences of similar * elements are represented as JSONArrays. Content text may be placed in a * "content" member. Comments, prologs, DTDs, and <[ [ ]]> * are ignored. * * @param string * The source string. * @return A JSONObject containing the structured data from the XML string. * @throws JSONException Thrown if there is an errors while parsing the string */ public static JSONObject toJSONObject(String string) throws JSONException { return toJSONObject(string, false); } /** * Convert a well-formed (but not necessarily valid) XML string into a * JSONObject. Some information may be lost in this transformation because * JSON is a data format and XML is a document format. XML uses elements, * attributes, and content text, while JSON uses unordered collections of * name/value pairs and arrays of values. JSON does not does not like to * distinguish between elements and attributes. Sequences of similar * elements are represented as JSONArrays. Content text may be placed in a * "content" member. Comments, prologs, DTDs, and <[ [ ]]> * are ignored. * * All values are converted as strings, for 1, 01, 29.0 will not be coerced to * numbers but will instead be the exact value as seen in the XML document. * * @param string * The source string. * @param keepStrings If true, then values will not be coerced into boolean * or numeric values and will instead be left as strings * @return A JSONObject containing the structured data from the XML string. * @throws JSONException Thrown if there is an errors while parsing the string */ public static JSONObject toJSONObject(String string, boolean keepStrings) throws JSONException { JSONObject jo = new JSONObject(); XMLTokener x = new XMLTokener(string); while (x.more() && x.skipPast("<")) { parse(x, jo, null, keepStrings); } return jo; } /** * Convert a JSONObject into a well-formed, element-normal XML string. * * @param object * A JSONObject. * @return A string. * @throws JSONException Thrown if there is an error parsing the string */ public static String toString(Object object) throws JSONException { return toString(object, null); } /** * Convert a JSONObject into a well-formed, element-normal XML string. * * @param object * A JSONObject. * @param tagName * The optional name of the enclosing tag. * @return A string. * @throws JSONException Thrown if there is an error parsing the string */ public static String toString(Object object, String tagName) throws JSONException { StringBuilder sb = new StringBuilder(); JSONArray ja; JSONObject jo; String key; Iterator keys; String string; Object value; if (object instanceof JSONObject) { // Emit if (tagName != null) { sb.append('<'); sb.append(tagName); sb.append('>'); } // Loop thru the keys. jo = (JSONObject) object; keys = jo.keys(); while (keys.hasNext()) { key = keys.next(); value = jo.opt(key); if (value == null) { value = ""; } else if (value.getClass().isArray()) { value = new JSONArray(value); } string = value instanceof String ? (String) value : null; // Emit content in body if ("content".equals(key)) { if (value instanceof JSONArray) { ja = (JSONArray) value; int i = 0; for (Object val : ja) { if (i > 0) { sb.append('\n'); } sb.append(escape(val.toString())); i++; } } else { sb.append(escape(value.toString())); } // Emit an array of similar keys } else if (value instanceof JSONArray) { ja = (JSONArray) value; for (Object val : ja) { if (val instanceof JSONArray) { sb.append('<'); sb.append(key); sb.append('>'); sb.append(toString(val)); sb.append("'); } else { sb.append(toString(val, key)); } } } else if ("".equals(value)) { sb.append('<'); sb.append(key); sb.append("/>"); // Emit a new tag } else { sb.append(toString(value, key)); } } if (tagName != null) { // Emit the close tag sb.append("'); } return sb.toString(); } if (object != null) { if (object.getClass().isArray()) { object = new JSONArray(object); } if (object instanceof JSONArray) { ja = (JSONArray) object; for (Object val : ja) { // XML does not have good support for arrays. If an array // appears in a place where XML is lacking, synthesize an // element. sb.append(toString(val, tagName == null ? "array" : tagName)); } return sb.toString(); } } string = (object == null) ? "null" : escape(object.toString()); return (tagName == null) ? "\"" + string + "\"" : (string.length() == 0) ? "<" + tagName + "/>" : "<" + tagName + ">" + string + ""; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy