All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.wicket.markup.parser.XmlPullParser Maven / Gradle / Ivy

Go to download

Pax Wicket Service is an OSGi extension of the Wicket framework, allowing for dynamic loading and unloading of Wicket components and pageSources.

There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.wicket.markup.parser;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;

import org.apache.wicket.markup.parser.XmlTag.TagType;
import org.apache.wicket.markup.parser.XmlTag.TextSegment;
import org.apache.wicket.util.io.FullyBufferedReader;
import org.apache.wicket.util.io.IOUtils;
import org.apache.wicket.util.io.XmlReader;
import org.apache.wicket.util.lang.Args;
import org.apache.wicket.util.parse.metapattern.parsers.TagNameParser;
import org.apache.wicket.util.parse.metapattern.parsers.VariableAssignmentParser;
import org.apache.wicket.util.resource.ResourceStreamNotFoundException;
import org.apache.wicket.util.string.Strings;

/**
 * A fairly shallow markup pull parser which parses a markup string of a given type of markup (for
 * example, html, xml, vxml or wml) into ComponentTag and RawMarkup tokens.
 * 
 * @author Jonathan Locke
 * @author Juergen Donnerstag
 */
public final class XmlPullParser implements IXmlPullParser
{
	/** */
	public static final String STYLE = "style";

	/** */
	public static final String SCRIPT = "script";

	/**
	 * Reads the xml data from an input stream and converts the chars according to its encoding
	 * ()
	 */
	private XmlReader xmlReader;

	/**
	 * A XML independent reader which loads the whole source data into memory and which provides
	 * convenience methods to access the data.
	 */
	private FullyBufferedReader input;

	/** temporary variable which will hold the name of the closing tag. */
	private String skipUntilText;

	/** The last substring selected from the input */
	private CharSequence lastText;

	/** Everything in between <!DOCTYPE ... > */
	private CharSequence doctype;

	/** The type of what is in lastText */
	private HttpTagType lastType = HttpTagType.NOT_INITIALIZED;

	/** The last tag found */
	private XmlTag lastTag;

	/**
	 * Construct.
	 */
	public XmlPullParser()
	{
	}

	public final String getEncoding()
	{
		return xmlReader.getEncoding();
	}

	public final CharSequence getDoctype()
	{
		return doctype;
	}

	public final CharSequence getInputFromPositionMarker(final int toPos)
	{
		return input.getSubstring(toPos);
	}

	public final CharSequence getInput(final int fromPos, final int toPos)
	{
		return input.getSubstring(fromPos, toPos);
	}

	/**
	 * Whatever will be in between the current index and the closing tag, will be ignored (and thus
	 * treated as raw markup (text). This is useful for tags like 'script'.
	 * 
	 * @throws ParseException
	 */
	private final void skipUntil() throws ParseException
	{
		// this is a tag with non-XHTML text as body - skip this until the
		// skipUntilText is found.
		final int startIndex = input.getPosition();
		final int tagNameLen = skipUntilText.length();

		int pos = input.getPosition() - 1;
		String endTagText = null;
		int lastPos = 0;
		while (!skipUntilText.equalsIgnoreCase(endTagText))
		{
			pos = input.find("= input.size()))
			{
				throw new ParseException(
					skipUntilText + " tag not closed" + getLineAndColumnText(), startIndex);
			}

			lastPos = pos + 2;
			endTagText = input.getSubstring(lastPos, lastPos + tagNameLen).toString();
		}

		input.setPosition(pos);
		lastText = input.getSubstring(startIndex, pos);
		lastType = HttpTagType.BODY;

		// Check that the tag is properly closed
		lastPos = input.find('>', lastPos + tagNameLen);
		if (lastPos == -1)
		{
			throw new ParseException(skipUntilText + " tag not closed" + getLineAndColumnText(),
				startIndex);
		}

		// Reset the state variable
		skipUntilText = null;
	}

	/**
	 * 
	 * @return line and column number
	 */
	private String getLineAndColumnText()
	{
		return " (line " + input.getLineNumber() + ", column " + input.getColumnNumber() + ")";
	}

	/**
	 * @return XXX
	 * @throws ParseException
	 */
	public final HttpTagType next() throws ParseException
	{
		// Reached end of markup file?
		if (input.getPosition() >= input.size())
		{
			return HttpTagType.NOT_INITIALIZED;
		}

		if (skipUntilText != null)
		{
			skipUntil();
			return lastType;
		}

		// Any more tags in the markup?
		final int openBracketIndex = input.find('<');

		// Tag or Body?
		if (input.charAt(input.getPosition()) != '<')
		{
			// It's a BODY
			if (openBracketIndex == -1)
			{
				// There is no next matching tag.
				lastText = input.getSubstring(-1);
				input.setPosition(input.size());
				lastType = HttpTagType.BODY;
				return lastType;
			}

			lastText = input.getSubstring(openBracketIndex);
			input.setPosition(openBracketIndex);
			lastType = HttpTagType.BODY;
			return lastType;
		}

		// Determine the line number
		input.countLinesTo(openBracketIndex);

		// Get index of closing tag and advance past the tag
		int closeBracketIndex = -1;

		if (openBracketIndex != -1 && openBracketIndex < input.size() - 1)
		{
			char nextChar = input.charAt(openBracketIndex + 1);

			if ((nextChar == '!') || (nextChar == '?'))
				closeBracketIndex = input.find('>', openBracketIndex);
			else
				closeBracketIndex = input.findOutOfQuotes('>', openBracketIndex);
		}

		if (closeBracketIndex == -1)
		{
			throw new ParseException("No matching close bracket at" + getLineAndColumnText(),
				input.getPosition());
		}

		// Get the complete tag text
		lastText = input.getSubstring(openBracketIndex, closeBracketIndex + 1);

		// Get the tagtext between open and close brackets
		String tagText = lastText.subSequence(1, lastText.length() - 1).toString();
		if (tagText.length() == 0)
		{
			throw new ParseException("Found empty tag: '<>' at" + getLineAndColumnText(),
				input.getPosition());
		}

		// Type of the tag, to be determined next
		final TagType type;

		// If the tag ends in '/', it's a "simple" tag like 
		if (tagText.endsWith("/"))
		{
			type = TagType.OPEN_CLOSE;
			tagText = tagText.substring(0, tagText.length() - 1);
		}
		else if (tagText.startsWith("/"))
		{
			// The tag text starts with a '/', it's a simple close tag
			type = TagType.CLOSE;
			tagText = tagText.substring(1);
		}
		else
		{
			// It must be an open tag
			type = TagType.OPEN;

			// If open tag and starts with "s" like "script" or "style", than ...
			if ((tagText.length() > STYLE.length()) &&
				((tagText.charAt(0) == 's') || (tagText.charAt(0) == 'S')))
			{
				final String lowerCase = tagText.substring(0, 6).toLowerCase();
				if (lowerCase.startsWith(SCRIPT))
				{
					// prepare to skip everything between the open and close tag
					skipUntilText = SCRIPT;
				}
				else if (lowerCase.startsWith(STYLE))
				{
					// prepare to skip everything between the open and close tag
					skipUntilText = STYLE;
				}
			}
		}

		// Handle special tags like  or  or 
	 * 
	 * @param tagText
	 * @param openBracketIndex
	 * @param closeBracketIndex
	 * @throws ParseException
	 */
	protected void specialTagHandling(String tagText, final int openBracketIndex,
		int closeBracketIndex) throws ParseException
	{
		// Handle comments
		if (tagText.startsWith("!--"))
		{
			// downlevel-revealed conditional comments e.g.: 
			if (tagText.contains("![endif]--"))
			{
				lastType = HttpTagType.CONDITIONAL_COMMENT_ENDIF;

				// Move to position after the tag
				input.setPosition(closeBracketIndex + 1);
				return;
			}

			// Conditional comment? E.g.
			// ""
			if (tagText.startsWith("!--[if ") && tagText.endsWith("]"))
			{
				int pos = input.find("]-->", openBracketIndex + 1);
				if (pos == -1)
				{
					throw new ParseException("Unclosed conditional comment beginning at" +
						getLineAndColumnText(), openBracketIndex);
				}

				pos += 4;
				lastText = input.getSubstring(openBracketIndex, pos);

				// Actually it is no longer a comment. It is now
				// up to the browser to select the section appropriate.
				input.setPosition(closeBracketIndex + 1);
				lastType = HttpTagType.CONDITIONAL_COMMENT;
			}
			else
			{
				// Normal comment section.
				// Skip ahead to "-->". Note that you can not simply test for
				// tagText.endsWith("--") as the comment might contain a '>'
				// inside.
				int pos = input.find("-->", openBracketIndex + 1);
				if (pos == -1)
				{
					throw new ParseException("Unclosed comment beginning at" +
						getLineAndColumnText(), openBracketIndex);
				}

				pos += 3;
				lastText = input.getSubstring(openBracketIndex, pos);
				lastType = HttpTagType.COMMENT;
				input.setPosition(pos);
			}
			return;
		}

		// The closing tag of a conditional comment, e.g.
		// "
		// and also "
		if (tagText.equals("![endif]--"))
		{
			lastType = HttpTagType.CONDITIONAL_COMMENT_ENDIF;
			input.setPosition(closeBracketIndex + 1);
			return;
		}

		// CDATA sections might contain "<" which is not part of an XML tag.
		// Make sure escaped "<" are treated right
		if (tagText.startsWith("!["))
		{
			final String startText = (tagText.length() <= 8 ? tagText : tagText.substring(0, 8));
			if (startText.toUpperCase().equals("![CDATA["))
			{
				int pos1 = openBracketIndex;
				do
				{
					// Get index of closing tag and advance past the tag
					closeBracketIndex = findChar('>', pos1);

					if (closeBracketIndex == -1)
					{
						throw new ParseException("No matching close bracket at" +
							getLineAndColumnText(), input.getPosition());
					}

					// Get the tagtext between open and close brackets
					tagText = input.getSubstring(openBracketIndex + 1, closeBracketIndex)
						.toString();

					pos1 = closeBracketIndex + 1;
				}
				while (tagText.endsWith("]]") == false);

				// Move to position after the tag
				input.setPosition(closeBracketIndex + 1);

				lastText = tagText;
				lastType = HttpTagType.CDATA;
				return;
			}
		}

		if (tagText.charAt(0) == '?')
		{
			lastType = HttpTagType.PROCESSING_INSTRUCTION;

			// Move to position after the tag
			input.setPosition(closeBracketIndex + 1);
			return;
		}

		if (tagText.startsWith("!DOCTYPE"))
		{
			lastType = HttpTagType.DOCTYPE;

			// Get the tagtext between open and close brackets
			doctype = input.getSubstring(openBracketIndex + 1, closeBracketIndex);

			// Move to position after the tag
			input.setPosition(closeBracketIndex + 1);
			return;
		}

		// Move to position after the tag
		lastType = HttpTagType.SPECIAL_TAG;
		input.setPosition(closeBracketIndex + 1);
	}

	/**
	 * @return MarkupElement
	 */
	public final XmlTag getElement()
	{
		return lastTag;
	}

	/**
	 * @return The xml string from the last element
	 */
	public final CharSequence getString()
	{
		return lastText;
	}

	/**
	 * @return The next XML tag
	 * @throws ParseException
	 */
	public final XmlTag nextTag() throws ParseException
	{
		while (next() != HttpTagType.NOT_INITIALIZED)
		{
			switch (lastType)
			{
				case TAG :
					return lastTag;

				case BODY :
					break;

				case COMMENT :
					break;

				case CONDITIONAL_COMMENT :
					break;

				case CDATA :
					break;

				case PROCESSING_INSTRUCTION :
					break;

				case SPECIAL_TAG :
					break;
			}
		}

		return null;
	}

	/**
	 * Find the char but ignore any text within ".." and '..'
	 * 
	 * @param ch
	 *            The character to search
	 * @param startIndex
	 *            Start index
	 * @return -1 if not found, else the index
	 */
	private int findChar(final char ch, int startIndex)
	{
		char quote = 0;

		for (; startIndex < input.size(); startIndex++)
		{
			final char charAt = input.charAt(startIndex);
			if (quote != 0)
			{
				if (quote == charAt)
				{
					quote = 0;
				}
			}
			else if ((charAt == '"') || (charAt == '\''))
			{
				quote = charAt;
			}
			else if (charAt == ch)
			{
				return startIndex;
			}
		}

		return -1;
	}

	/**
	 * Parse the given string.
	 * 

* Note: xml character encoding is NOT applied. It is assumed the input provided does have the * correct encoding already. * * @param string * The input string * @throws IOException * Error while reading the resource * @throws ResourceStreamNotFoundException * Resource not found */ public void parse(final CharSequence string) throws IOException, ResourceStreamNotFoundException { parse(new ByteArrayInputStream(string.toString().getBytes()), null); } /** * Reads and parses markup from an input stream, using UTF-8 encoding by default when not * specified in XML declaration. * * @param in * The input stream to read and parse * @throws IOException * @throws ResourceStreamNotFoundException */ public void parse(final InputStream in) throws IOException, ResourceStreamNotFoundException { // When XML declaration does not specify encoding, it defaults to UTF-8 parse(in, "UTF-8"); } /** * Reads and parses markup from an input stream * * @param inputStream * The input stream to read and parse * @param encoding * The default character encoding of the input * @throws IOException */ public void parse(final InputStream inputStream, final String encoding) throws IOException { Args.notNull(inputStream, "inputStream"); try { xmlReader = new XmlReader(new BufferedInputStream(inputStream, 4000), encoding); input = new FullyBufferedReader(xmlReader); } finally { IOUtils.closeQuietly(inputStream); IOUtils.closeQuietly(xmlReader); } } public final void setPositionMarker() { input.setPositionMarker(input.getPosition()); } public final void setPositionMarker(final int pos) { input.setPositionMarker(pos); } @Override public String toString() { return input.toString(); } /** * Parses the text between tags. For example, "a href=foo.html". * * @param tag * @param tagText * The text between tags * @return false in case of an error * @throws ParseException */ private boolean parseTagText(final XmlTag tag, final String tagText) throws ParseException { // Get the length of the tagtext final int tagTextLength = tagText.length(); // If we match tagname pattern final TagNameParser tagnameParser = new TagNameParser(tagText); if (tagnameParser.matcher().lookingAt()) { // Extract the tag from the pattern matcher tag.name = tagnameParser.getName(); tag.namespace = tagnameParser.getNamespace(); // Are we at the end? Then there are no attributes, so we just // return the tag int pos = tagnameParser.matcher().end(0); if (pos == tagTextLength) { return true; } // Extract attributes final VariableAssignmentParser attributeParser = new VariableAssignmentParser(tagText); while (attributeParser.matcher().find(pos)) { // Get key and value using attribute pattern String value = attributeParser.getValue(); // In case like will the value be null if (value == null) { value = ""; } // Set new position to end of attribute pos = attributeParser.matcher().end(0); // Chop off double quotes or single quotes if (value.startsWith("\"") || value.startsWith("\'")) { value = value.substring(1, value.length() - 1); } // Trim trailing whitespace value = value.trim(); // Unescape value = Strings.unescapeMarkup(value).toString(); // Get key final String key = attributeParser.getKey(); // Put the attribute in the attributes hash if (null != tag.getAttributes().put(key, value)) { throw new ParseException("Same attribute found twice: " + key + getLineAndColumnText(), input.getPosition()); } // The input has to match exactly (no left over junk after // attributes) if (pos == tagTextLength) { return true; } } return true; } return false; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy