com.xiaoleilu.hutool.json.XMLTokener Maven / Gradle / Ivy

Go to download
package com.xiaoleilu.hutool.json;

/**
 * XML分析器，继承自JSONTokener，提供XML的语法分析
 * 
 * @author JSON.org
 */
public class XMLTokener extends JSONTokener {

	/**
	 * The table of entity values. It initially contains Character values for amp, apos, gt, lt, quot.
	 */
	public static final java.util.HashMap entity;

	static {
		entity = new java.util.HashMap(8);
		entity.put("amp", XML.AMP);
		entity.put("apos", XML.APOS);
		entity.put("gt", XML.GT);
		entity.put("lt", XML.LT);
		entity.put("quot", XML.QUOT);
	}

	/**
	 * Construct an XMLTokener from a string.
	 * 
	 * @param s A source string.
	 */
	public XMLTokener(String s) {
		super(s);
	}

	/**
	 * Get the text in the CDATA block.
	 * 
	 * @return The string up to the ]]>.
	 * @throws JSONException If the ]]> is not found.
	 */
	public String nextCDATA() throws JSONException {
		char c;
		int i;
		StringBuilder sb = new StringBuilder();
		for (;;) {
			c = next();
			if (end()) {
				throw syntaxError("Unclosed CDATA");
			}
			sb.append(c);
			i = sb.length() - 3;
			if (i >= 0 && sb.charAt(i) == ']' && sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') {
				sb.setLength(i);
				return sb.toString();
			}
		}
	}

	/**
	 * Get the next XML outer token, trimming whitespace. 
	 * There are two kinds of tokens: the '>' character which begins a markup tag, and the content text between markup tags.
	 *
	 * @return A string, or a '>' Character, or null if there is no more source text.
	 * @throws JSONException JSON
	 */
	public Object nextContent() throws JSONException {
		char c;
		StringBuilder sb;
		do {
			c = next();
		} while (Character.isWhitespace(c));
		if (c == 0) {
			return null;
		}
		if (c == '<') {
			return XML.LT;
		}
		sb = new StringBuilder();
		for (;;) {
			if (c == '<' || c == 0) {
				back();
				return sb.toString().trim();
			}
			if (c == '&') {
				sb.append(nextEntity(c));
			} else {
				sb.append(c);
			}
			c = next();
		}
	}

	/**
	 * Return the next entity. These entities are translated to Characters: &  '  >  <  ".
	 * 
	 * @param ampersand An ampersand character.
	 * @return A Character or an entity String if the entity is not recognized.
	 * @throws JSONException If missing ';' in XML entity.
	 */
	public Object nextEntity(char ampersand) throws JSONException {
		StringBuilder sb = new StringBuilder();
		for (;;) {
			char c = next();
			if (Character.isLetterOrDigit(c) || c == '#') {
				sb.append(Character.toLowerCase(c));
			} else if (c == ';') {
				break;
			} else {
				throw syntaxError("Missing ';' in XML entity: &" + sb);
			}
		}
		String string = sb.toString();
		Object object = entity.get(string);
		return object != null ? object : ampersand + string + ";";
	}

	/**
	 * Returns the next XML meta token. This is used for skipping over <!...> and <?...?> structures.
	 * 
	 * @return Syntax characters (< > / = ! ?) are returned as Character, and strings and names are returned as Boolean. We don't care what the values actually are.
	 * @throws JSONException 字符串中属性未关闭或XML结构错误抛出此异常。If a string is not properly closed or if the XML is badly structured.
	 */
	public Object nextMeta() throws JSONException {
		char c;
		char q;
		do {
			c = next();
		} while (Character.isWhitespace(c));
		switch (c) {
			case 0:
				throw syntaxError("Misshaped meta tag");
			case '<':
				return XML.LT;
			case '>':
				return XML.GT;
			case '/':
				return XML.SLASH;
			case '=':
				return XML.EQ;
			case '!':
				return XML.BANG;
			case '?':
				return XML.QUEST;
			case '"':
			case '\'':
				q = c;
				for (;;) {
					c = next();
					if (c == 0) {
						throw syntaxError("Unterminated string");
					}
					if (c == q) {
						return Boolean.TRUE;
					}
				}
			default:
				for (;;) {
					c = next();
					if (Character.isWhitespace(c)) {
						return Boolean.TRUE;
					}
					switch (c) {
						case 0:
						case '<':
						case '>':
						case '/':
						case '=':
						case '!':
						case '?':
						case '"':
						case '\'':
							back();
							return Boolean.TRUE;
					}
				}
		}
	}

	/**
	 * Get the next XML Token. These tokens are found inside of angle brackets. It may be one of these characters: / > = ! ? or it may be a string wrapped in single quotes or double
	 * quotes, or it may be a name.
	 * 
	 * @return a String or a Character.
	 * @throws JSONException If the XML is not well formed.
	 */
	public Object nextToken() throws JSONException {
		char c;
		char q;
		StringBuilder sb;
		do {
			c = next();
		} while (Character.isWhitespace(c));
		switch (c) {
			case 0:
				throw syntaxError("Misshaped element");
			case '<':
				throw syntaxError("Misplaced '<'");
			case '>':
				return XML.GT;
			case '/':
				return XML.SLASH;
			case '=':
				return XML.EQ;
			case '!':
				return XML.BANG;
			case '?':
				return XML.QUEST;

			// Quoted string

			case '"':
			case '\'':
				q = c;
				sb = new StringBuilder();
				for (;;) {
					c = next();
					if (c == 0) {
						throw syntaxError("Unterminated string");
					}
					if (c == q) {
						return sb.toString();
					}
					if (c == '&') {
						sb.append(nextEntity(c));
					} else {
						sb.append(c);
					}
				}
			default:

				// Name

				sb = new StringBuilder();
				for (;;) {
					sb.append(c);
					c = next();
					if (Character.isWhitespace(c)) {
						return sb.toString();
					}
					switch (c) {
						case 0:
							return sb.toString();
						case '>':
						case '/':
						case '=':
						case '!':
						case '?':
						case '[':
						case ']':
							back();
							return sb.toString();
						case '<':
						case '"':
						case '\'':
							throw syntaxError("Bad character in a name");
					}
				}
		}
	}

	/**
	 * Skip characters until past the requested string. If it is not found, we are left at the end of the source with a result of false.
	 * 
	 * @param to A string to skip past.
	 * @return 是否成功skip
	 * @throws JSONException JSON异常
	 */
	public boolean skipPast(String to) throws JSONException {
		boolean b;
		char c;
		int i;
		int j;
		int offset = 0;
		int length = to.length();
		char[] circle = new char[length];

		/*
		 * First fill the circle buffer with as many characters as are in the to string. If we reach an early end, bail.
		 */

		for (i = 0; i < length; i += 1) {
			c = next();
			if (c == 0) {
				return false;
			}
			circle[i] = c;
		}

		/* We will loop, possibly for all of the remaining characters. */

		for (;;) {
			j = offset;
			b = true;

			/* Compare the circle buffer with the to string. */

			for (i = 0; i < length; i += 1) {
				if (circle[j] != to.charAt(i)) {
					b = false;
					break;
				}
				j += 1;
				if (j >= length) {
					j -= length;
				}
			}

			/* If we exit the loop with b intact, then victory is ours. */

			if (b) {
				return true;
			}

			/* Get the next character. If there isn't one, then defeat is ours. */

			c = next();
			if (c == 0) {
				return false;
			}
			/*
			 * Shove the character in the circle buffer and advance the circle offset. The offset is mod n.
			 */
			circle[offset] = c;
			offset += 1;
			if (offset >= length) {
				offset -= length;
			}
		}
	}
}