com.x5.util.LiteXml Maven / Gradle / Ivy
package com.x5.util;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// This bare-bones class provides a bare minimum XML parsing functionality
// and is perfect for thin clients that get a lot of very short messages
// formatted in XML. The XML must be very well-formed, any extraneous spaces
// or angle brackets will cause the parser to choke. The need to keep
// the client small outweighs the convenience that better exception handling
// would add here.
public class LiteXml
{
private String xml;
private Map attrs = null;
// can handle this many child nodes in a parse for children
// before pausing to grow the endpoints array.
// nested nodes will survive ok, they don't add to the final count.
private static final int MAX_PARSE = 15;
public LiteXml(String xmlNode)
{
this.xml = xmlNode;
}
public String getNodeType()
{
if (xml == null) return null;
int startAt = 0;
int headerPos = xml.indexOf("?>");
if (headerPos > -1) {
startAt = headerPos+2;
}
int begPos = xml.indexOf('<',startAt);
if (begPos < 0) return null;
// assume space or > follows nodetype (ie no cr/lf)
// assume no space between < and nodetype
int spacePos = xml.indexOf(' ',begPos);
// assume attribute names and values do not contain >
int endPos = xml.indexOf('>',begPos);
if (spacePos > -1 && spacePos < endPos) endPos = spacePos;
if (endPos < begPos+1) return null;
String nodeType = xml.substring(begPos+1,endPos);
return nodeType;
}
public Map getAttributes()
{
if (xml == null) return null;
if (this.attrs != null) return this.attrs;
// pick a candidate for close-tag-position (may disprove later if nec)
int tagEndPos = xml.indexOf('>');
// not found? bail.
if (tagEndPos < 0) return null;
int spacePos = xml.indexOf(' ');
// no attributes? bail.
if (spacePos < 0 || spacePos > tagEndPos) return null;
// narrow the parsing space to just this tag's attributes
String attrDefs = xml.substring(spacePos+1,tagEndPos);
this.attrs = parseAttributes(attrDefs);
return this.attrs;
}
private Map parseAttributes(String attrDef)
{
Map attrs = new HashMap();
int cursor = 0;
while (cursor < attrDef.length()) {
// establish value location: first seek =, then the opening double-quote
int openQuotePos = attrDef.indexOf('=',cursor);
if (openQuotePos < 0) break;
// store the param name
String param = attrDef.substring(cursor,openQuotePos);
// look for double-quote following = sign.
openQuotePos = attrDef.indexOf('"',openQuotePos + 1);
if (openQuotePos < 0) break;
cursor = openQuotePos + 1;
// look for unescaped closing double-quote
int closeQuotePos = nextUnescapedDelim("\"",attrDef,cursor);
if (closeQuotePos < 0) break;
// okay, if we made it this far we have a properly delimited value
String val = attrDef.substring(cursor,closeQuotePos);
// unescape slashes and quotes in the value
val = val.replaceAll("\\\\\"", "\"");
val = val.replaceAll("\\\\\\\\", "\\\\");
// store name-value pair
attrs.put(param.trim(), unescapeXML(val));
// seek to next attribute
cursor = attrDef.indexOf(' ',closeQuotePos+1);
if (cursor < 0) break;
cursor++;
}
return attrs;
}
public static int nextUnescapedDelim(String delim, String toScan, int searchFrom)
{
int delimPos = toScan.indexOf(delim, searchFrom);
boolean isProvenDelimeter = false;
while (!isProvenDelimeter) {
// count number of backslashes that precede this forward slash
int bsCount = 0;
while (delimPos-(1+bsCount) >= searchFrom && toScan.charAt(delimPos - (1+bsCount)) == '\\') {
bsCount++;
}
// if odd number of backslashes precede this delimiter char, it's escaped
// if even number precede, it's not escaped, it's the true delimiter
// (because it's preceded by either no backslash or an escaped backslash)
if (bsCount % 2 == 0) {
isProvenDelimeter = true;
} else {
// keep looking for real delimiter
delimPos = toScan.indexOf(delim, delimPos+1);
// if the regex is not legal (missing delimiters??), bail out
if (delimPos < 0) return -1;
}
}
return delimPos;
}
public String getAttribute(String attr)
{
Map myAttrs = getAttributes();
if (myAttrs == null || myAttrs.size() < 1) {
return null;
}
return myAttrs.get(attr);
}
private String getRawNodeValue()
{
if (xml == null) return null;
// assume single node
String nodeType = getNodeType();
if (nodeType == null) return null;
int topTagEnd = xml.indexOf(nodeType) + nodeType.length();
topTagEnd = xml.indexOf('>',topTagEnd);
int endTagBeg = xml.lastIndexOf('<');
if (topTagEnd < 0 || endTagBeg < topTagEnd) {
return null;
} else {
if (xml.indexOf(nodeType,endTagBeg) < 0) {
// begin and end tags are NOT matched.
// this string is probably orphaned sub-elements
return xml;
} else {
return xml.substring(topTagEnd+1,endTagBeg);
}
}
}
private boolean isCDATA(String x)
{
if (x == null) return false;
String contents = x.trim();
if (contents.startsWith("")) {
return true;
} else {
return false;
}
}
public String getNodeValue()
{
String contents = getRawNodeValue();
if (contents == null) return null;
if (isCDATA(contents)) {
return contents.trim().substring(9,contents.length()-3);
} else {
return LiteXml.unescapeXML(contents);
}
}
/**
* getChildNodes(nodeType) shares the limitations of the method below.
*
* only returns nodes which match specified nodetype.
*
* @param nodeType
* @return matching child nodes as an array of LiteXml objects
*/
public LiteXml[] getChildNodes(String nodeType)
{
if (nodeType == null) return null;
LiteXml[] children = getChildNodes();
if (children == null) return null;
// count matching nodes
boolean[] isMatch = new boolean[children.length];
int matches = 0;
for (int i=0; i= endpoints.length) {
// hacked to enable unlimited children -tjm
endpoints = extendArray(endpoints);
}
// locate beginning of child
int opening = insides.indexOf('<',marker);
if (opening < 0) {
break;
//marker = opening + 1;
//continue;
}
// verify that this is not a closing tag eg
if (insides.charAt(opening+1) == '/') return null;
int closing = insides.indexOf('>',opening+1);
if (closing < 0) return null;
// check for self-closing tag eg
if (insides.charAt(closing-1) == '/') {
endpoints[count*2] = opening;
endpoints[count*2+1] = closing+1;
count++;
marker = closing+1;
continue;
}
// scan ahead for end tag, then verify that it matches our tag and
// not some nested tag of the same nodetype.
int spacePos = insides.indexOf(' ',opening+1);
int bracketPos = insides.indexOf('>',opening+1);
if (spacePos < 0 && bracketPos < 0) return null;
int typeEnd = spacePos;
if (typeEnd < 0 || typeEnd > bracketPos) typeEnd = bracketPos;
String type = insides.substring(opening+1,typeEnd);
String childEnd = "" + type;
int childEndPos = insides.indexOf(childEnd,closing+1);
String nestedSOB = "<" + type;
int nestedPos = insides.indexOf(nestedSOB,closing+1);
// handle nesting if discovered
while (nestedPos > -1 && nestedPos < childEndPos) {
// this first time here we matched the most nested endtag...
// 1:A
// 2:B
// 3:C
// 4:/C
// 5:D
// 6:/D
// 7:/B
// 8:/A
// ie we found two and four so keep searching for
// pairs (3/6,5/7) until you get to an unmatched one (8).
// It doesn't matter that the pairs aren't correctly matched,
// just that they are found in pairs.
childEndPos = insides.indexOf(childEnd,childEndPos + 3);
if (childEndPos < 0) return null;
nestedPos = insides.indexOf(nestedSOB,nestedPos + 3);
}
int finalBoundary = insides.indexOf('>',childEndPos+2);
if (finalBoundary < 0) return null; // fatal
endpoints[count*2] = opening;
endpoints[count*2+1] = finalBoundary+1;
count++;
marker = finalBoundary+1;
}
if (count < 1) return null;
LiteXml[] children = new LiteXml[count];
for (int i=0; i STD_ENTITIES = getStandardEntities();
public static String unescapeXML( final String xml )
{
//Unfortunately, Matcher requires a StringBuffer instead of a StringBuilder
StringBuffer unescapedOutput = new StringBuffer( xml.length() );
Matcher m = XML_ENTITY_REGEX.matcher( xml );
String entity;
String hashmark;
String ent;
int code;
while ( m.find() ) {
ent = m.group(2);
hashmark = m.group(1);
if ( (hashmark != null) && (hashmark.length() > 0) ) {
if ( ent.substring(0,1).toLowerCase().equals("x") ) {
code = Integer.parseInt( ent.substring(1), 16 );
} else {
code = Integer.parseInt( ent );
}
entity = Character.toString( (char) code );
} else {
entity = STD_ENTITIES.get( ent );
if ( entity == null ) {
//not a known entity - ignore it
entity = "&" + ent + ';';
}
}
m.appendReplacement( unescapedOutput, entity );
}
m.appendTail( unescapedOutput );
return unescapedOutput.toString();
}
private static Map getStandardEntities()
{
Map entities = new HashMap(10);
entities.put( "lt", "<" );
entities.put( "gt", ">" );
entities.put( "amp", "&" );
entities.put( "apos", "'" );
entities.put( "quot", "\"" );
return entities;
}
public String toString()
{
return xml;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy