com.x5.util.LiteXml Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of chunk-templates Show documentation
Chunk Template Engine for Java
There is a newer version: 3.6.2
package com.x5.util;

import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

// This bare-bones class provides a bare minimum XML parsing functionality
// and is perfect for thin clients that get a lot of very short messages
// formatted in XML.  The XML must be very well-formed, any extraneous spaces
// or angle brackets will cause the parser to choke.  The need to keep
// the client small outweighs the convenience that better exception handling
// would add here.

public class LiteXml
{
    private String xml;
    private Map attrs = null;
    // can handle this many child nodes in a parse for children
    // before pausing to grow the endpoints array.
    // nested nodes will survive ok, they don't add to the final count.
    private static final int MAX_PARSE = 15;

    public LiteXml(String xmlNode)
    {
        this.xml = xmlNode;
    }

    public String getNodeType()
    {
        if (xml == null) return null;
        int startAt = 0;
        int headerPos = xml.indexOf("?>");
        if (headerPos > -1) {
            startAt = headerPos+2;
        }
        int begPos = xml.indexOf('<',startAt);
        if (begPos < 0) return null;
        // assume space or > follows nodetype (ie no cr/lf)
        // assume no space between < and nodetype
        int spacePos = xml.indexOf(' ',begPos);
        // assume attribute names and values do not contain >
        int endPos = xml.indexOf('>',begPos);
        if (spacePos > -1 && spacePos < endPos) endPos = spacePos;
        if (endPos < begPos+1) return null;
        String nodeType = xml.substring(begPos+1,endPos);
        return nodeType;
    }

    public Map getAttributes()
    {
        if (xml == null) return null;
        if (this.attrs != null) return this.attrs;

        // pick a candidate for close-tag-position (may disprove later if nec)
        int tagEndPos = xml.indexOf('>');
        // not found? bail.
        if (tagEndPos < 0) return null;
        int spacePos = xml.indexOf(' ');
        // no attributes? bail.
        if (spacePos < 0 || spacePos > tagEndPos) return null;
        // narrow the parsing space to just this tag's attributes
        String attrDefs = xml.substring(spacePos+1,tagEndPos);
        this.attrs = parseAttributes(attrDefs);
        return this.attrs;
    }

    private Map parseAttributes(String attrDef)
    {
        Map attrs = new HashMap();
        int cursor = 0;
        while (cursor < attrDef.length()) {
            // establish value location: first seek =, then the opening double-quote
            int openQuotePos = attrDef.indexOf('=',cursor);
            if (openQuotePos < 0) break;
            // store the param name
            String param = attrDef.substring(cursor,openQuotePos);
            // look for double-quote following = sign.
            openQuotePos = attrDef.indexOf('"',openQuotePos + 1);
            if (openQuotePos < 0) break;
            cursor = openQuotePos + 1;
            // look for unescaped closing double-quote
            int closeQuotePos = nextUnescapedDelim("\"",attrDef,cursor);
            if (closeQuotePos < 0) break;
            // okay, if we made it this far we have a properly delimited value
            String val = attrDef.substring(cursor,closeQuotePos);
            // unescape slashes and quotes in the value
            val = val.replaceAll("\\\\\"", "\"");
            val = val.replaceAll("\\\\\\\\", "\\\\");
            // store name-value pair
            attrs.put(param.trim(), unescapeXML(val));
            // seek to next attribute
            cursor = attrDef.indexOf(' ',closeQuotePos+1);
            if (cursor < 0) break;
            cursor++;
        }

        return attrs;
    }

    public static int nextUnescapedDelim(String delim, String toScan, int searchFrom)
    {
        int delimPos = toScan.indexOf(delim, searchFrom);

        boolean isProvenDelimeter = false;
        while (!isProvenDelimeter) {
            // count number of backslashes that precede this forward slash
            int bsCount = 0;
            while (delimPos-(1+bsCount) >= searchFrom && toScan.charAt(delimPos - (1+bsCount)) == '\\') {
                bsCount++;
            }
            // if odd number of backslashes precede this delimiter char, it's escaped
            // if even number precede, it's not escaped, it's the true delimiter
            // (because it's preceded by either no backslash or an escaped backslash)
            if (bsCount % 2 == 0) {
                isProvenDelimeter = true;
            } else {
                // keep looking for real delimiter
                delimPos = toScan.indexOf(delim, delimPos+1);
                // if the regex is not legal (missing delimiters??), bail out
                if (delimPos < 0) return -1;
            }
        }
        return delimPos;
    }

    public String getAttribute(String attr)
    {
        Map myAttrs = getAttributes();

        if (myAttrs == null || myAttrs.size() < 1) {
            return null;
        }

        return myAttrs.get(attr);
    }

    private String getRawNodeValue()
    {
        if (xml == null) return null;
        // assume single node
        String nodeType = getNodeType();
        if (nodeType == null) return null;
        int topTagEnd = xml.indexOf(nodeType) + nodeType.length();
        topTagEnd = xml.indexOf('>',topTagEnd);
        int endTagBeg = xml.lastIndexOf('<');
        if (topTagEnd < 0 || endTagBeg < topTagEnd) {
            return null;
        } else {
            if (xml.indexOf(nodeType,endTagBeg) < 0) {
                // begin and end tags are NOT matched.
                // this string is probably orphaned sub-elements
                return xml;
            } else {
                return xml.substring(topTagEnd+1,endTagBeg);
            }
        }
    }

    private boolean isCDATA(String x)
    {
        if (x == null) return false;
        String contents = x.trim();
        if (contents.startsWith("")) {
            return true;
        } else {
            return false;
        }
    }

    public String getNodeValue()
    {
        String contents = getRawNodeValue();
        if (contents == null) return null;

        if (isCDATA(contents)) {
            return contents.trim().substring(9,contents.length()-3);
        } else {
            return LiteXml.unescapeXML(contents);
        }
    }

    /**
     * getChildNodes(nodeType) shares the limitations of the method below.
     *
     * only returns nodes which match specified nodetype.
     *
     * @param nodeType
     * @return matching child nodes as an array of LiteXml objects
     */
    public LiteXml[] getChildNodes(String nodeType)
    {
        if (nodeType == null) return null;

        LiteXml[] children = getChildNodes();
        if (children == null) return null;

        // count matching nodes
        boolean[] isMatch = new boolean[children.length];
        int matches = 0;
        for (int i=0; i= endpoints.length) {
                // hacked to enable unlimited children -tjm
                endpoints = extendArray(endpoints);
            }
            // locate beginning of child
            int opening = insides.indexOf('<',marker);
            if (opening < 0) {
                break;
                //marker = opening + 1;
                //continue;
            }
            // verify that this is not a closing tag eg 
            if (insides.charAt(opening+1) == '/') return null;
            int closing = insides.indexOf('>',opening+1);
            if (closing < 0) return null;
            // check for self-closing tag eg 
            if (insides.charAt(closing-1) == '/') {
                endpoints[count*2] = opening;
                endpoints[count*2+1] = closing+1;
                count++;
                marker = closing+1;
                continue;
            }
            // scan ahead for end tag, then verify that it matches our tag and
            // not some nested tag of the same nodetype.
            int spacePos = insides.indexOf(' ',opening+1);
            int bracketPos = insides.indexOf('>',opening+1);
            if (spacePos < 0 && bracketPos < 0) return null;
            int typeEnd = spacePos;
            if (typeEnd < 0 || typeEnd > bracketPos) typeEnd = bracketPos;
            String type = insides.substring(opening+1,typeEnd);
            String childEnd = " -1 && nestedPos < childEndPos) {
                // this first time here we matched the most nested endtag...
                // 1:A
                //  2:B
                //   3:C
                //   4:/C
                //   5:D
                //   6:/D
                //  7:/B
                // 8:/A
                // ie we found two and four so keep searching for
                // pairs (3/6,5/7) until you get to an unmatched one (8).
                // It doesn't matter that the pairs aren't correctly matched,
                // just that they are found in pairs.
                childEndPos = insides.indexOf(childEnd,childEndPos + 3);
                if (childEndPos < 0) return null;
                nestedPos = insides.indexOf(nestedSOB,nestedPos + 3);
            }
            int finalBoundary = insides.indexOf('>',childEndPos+2);
            if (finalBoundary < 0) return null; // fatal
            endpoints[count*2] = opening;
            endpoints[count*2+1] = finalBoundary+1;
            count++;
            marker = finalBoundary+1;
        }
        if (count < 1) return null;
        LiteXml[] children = new LiteXml[count];
        for (int i=0; i STD_ENTITIES = getStandardEntities();

    public static String unescapeXML( final String xml )
    {
        //Unfortunately, Matcher requires a StringBuffer instead of a StringBuilder
        StringBuffer unescapedOutput = new StringBuffer( xml.length() );

        Matcher m = XML_ENTITY_REGEX.matcher( xml );
        String entity;
        String hashmark;
        String ent;
        int code;
        while ( m.find() ) {
            ent = m.group(2);
            hashmark = m.group(1);
            if ( (hashmark != null) && (hashmark.length() > 0) ) {
                if ( ent.substring(0,1).toLowerCase().equals("x") ) {
                    code = Integer.parseInt( ent.substring(1), 16 );
                } else {
                    code = Integer.parseInt( ent );
                }
                entity = Character.toString( (char) code );
            } else {
                entity = STD_ENTITIES.get( ent );
                if ( entity == null ) {
                    //not a known entity - ignore it
                    entity = "&" + ent + ';';
                }
            }
            m.appendReplacement( unescapedOutput, entity );
        }
        m.appendTail( unescapedOutput );

        return unescapedOutput.toString();
    }

    private static Map getStandardEntities()
    {
        Map entities = new HashMap(10);
        entities.put( "lt", "<" );
        entities.put( "gt", ">" );
        entities.put( "amp", "&" );
        entities.put( "apos", "'" );
        entities.put( "quot", "\"" );
        return entities;
    }

    public String toString()
    {
        return xml;
    }
}