org.apache.wicket.markup.parser.XmlPullParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.ops4j.pax.wicket.service Show documentation
Pax Wicket Service is an OSGi extension of the Wicket framework, allowing for dynamic loading and unloading of Wicket components and pageSources.
There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.wicket.markup.parser;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;

import org.apache.wicket.markup.MarkupElement;
import org.apache.wicket.util.io.FullyBufferedReader;
import org.apache.wicket.util.io.XmlReader;
import org.apache.wicket.util.parse.metapattern.parsers.TagNameParser;
import org.apache.wicket.util.parse.metapattern.parsers.VariableAssignmentParser;
import org.apache.wicket.util.resource.ResourceStreamNotFoundException;


/**
 * A fairly shallow markup pull parser which parses a markup string of a given type of markup (for
 * example, html, xml, vxml or wml) into ComponentTag and RawMarkup tokens.
 * 
 * @author Jonathan Locke
 * @author Juergen Donnerstag
 */
public final class XmlPullParser extends AbstractMarkupFilter implements IXmlPullParser
{
	/** next() must be called at least once for the Type to be valid */
	public static final int NOT_INITIALIZED = 0;

	/**  */
	public static final int TAG = 1;

	/** Tag body in between two tags */
	public static final int BODY = 2;

	/**  */
	public static final int COMMENT = 3;

	/**  */
	public static final int CDATA = 4;

	/**  */
	public static final int PROCESSING_INSTRUCTION = 5;

	/** all other tags which look like  */
	public static final int SPECIAL_TAG = 6;

	/**
	 * Reads the xml data from an input stream and converts the chars according to its encoding
	 * ()
	 */
	private XmlReader xmlReader;

	/**
	 * A XML independent reader which loads the whole source data into memory and which provides
	 * convenience methods to access the data.
	 */
	private FullyBufferedReader input;

	/** temporary variable which will hold the name of the closing tag. */
	private String skipUntilText;

	/** The last substring selected from the input */
	private CharSequence lastText;

	/** The type of what is in lastText */
	private int lastType = NOT_INITIALIZED;

	/** If lastType == TAG, than ... */
	private XmlTag lastTag;

	/**
	 * Construct.
	 */
	public XmlPullParser()
	{
	}

	/**
	 * 
	 * @see org.apache.wicket.markup.parser.IXmlPullParser#getEncoding()
	 */
	public String getEncoding()
	{
		return xmlReader.getEncoding();
	}

	/**
	 * 
	 * @see org.apache.wicket.markup.parser.IXmlPullParser#getXmlDeclaration()
	 */
	public String getXmlDeclaration()
	{
		return xmlReader.getXmlDeclaration();
	}

	/**
	 * 
	 * @see org.apache.wicket.markup.parser.IXmlPullParser#getInputFromPositionMarker(int)
	 */
	public final CharSequence getInputFromPositionMarker(final int toPos)
	{
		return input.getSubstring(toPos);
	}

	/**
	 * 
	 * @see org.apache.wicket.markup.parser.IXmlPullParser#getInput(int, int)
	 */
	public final CharSequence getInput(final int fromPos, final int toPos)
	{
		return input.getSubstring(fromPos, toPos);
	}

	/**
	 * Whatever will be in between the current index and the closing tag, will be ignored (and thus
	 * treated as raw markup (text). This is useful for tags like 'script'.
	 * 
	 * @throws ParseException
	 */
	private final void skipUntil() throws ParseException
	{
		// this is a tag with non-XHTML text as body - skip this until the
		// skipUntilText is found.
		final int startIndex = input.getPosition();
		final int tagNameLen = skipUntilText.length();

		int pos = input.getPosition() - 1;
		String endTagText = null;
		int lastPos = 0;
		while (!skipUntilText.equalsIgnoreCase(endTagText))
		{
			pos = input.find("= input.size()))
			{
				throw new ParseException(skipUntilText + " tag not closed (line " +
					input.getLineNumber() + ", column " + input.getColumnNumber() + ")", startIndex);
			}

			lastPos = pos + 2;
			endTagText = input.getSubstring(lastPos, lastPos + tagNameLen).toString();
		}

		input.setPosition(pos);
		lastText = input.getSubstring(startIndex, pos);
		lastType = BODY;

		// Check that the tag is properly closed
		lastPos = input.find('>', lastPos + tagNameLen);
		if (lastPos == -1)
		{
			throw new ParseException("Script tag not closed (line " + input.getLineNumber() +
				", column " + input.getColumnNumber() + ")", startIndex);
		}

		// Reset the state variable
		skipUntilText = null;
	}

	/**
	 * Gets the next tag from the input string.
	 * 
	 * @return The extracted tag (will always be of type XmlTag).
	 * @throws ParseException
	 */
	public final boolean next() throws ParseException
	{
		// Reached end of markup file?
		if (input.getPosition() >= input.size())
		{
			return false;
		}

		if (skipUntilText != null)
		{
			skipUntil();
			return true;
		}

		// Any more tags in the markup?
		final int openBracketIndex = input.find('<');

		// Tag or Body?
		if (input.charAt(input.getPosition()) != '<')
		{
			if (openBracketIndex == -1)
			{
				// There is no next matching tag.
				lastText = input.getSubstring(-1);
				input.setPosition(input.size());
				lastType = BODY;
				return true;
			}

			lastText = input.getSubstring(openBracketIndex);
			input.setPosition(openBracketIndex);
			lastType = BODY;
			return true;
		}

		// Determine the line number
		input.countLinesTo(openBracketIndex);

		// Get index of closing tag and advance past the tag
		int closeBracketIndex = input.find('>', openBracketIndex + 1);
		if (closeBracketIndex == -1)
		{
			throw new ParseException("No matching close bracket at position " + openBracketIndex,
				input.getPosition());
		}

		// Get the complete tag text
		lastText = input.getSubstring(openBracketIndex, closeBracketIndex + 1);

		// Get the tagtext between open and close brackets
		String tagText = lastText.subSequence(1, lastText.length() - 1).toString();
		if (tagText.length() == 0)
		{
			throw new ParseException("Found empty tag: '<>' at position " + openBracketIndex,
				input.getPosition());
		}

		// Handle special tags like  or  or 
	 * 
	 * @param tagText
	 * @param openBracketIndex
	 * @param closeBracketIndex
	 * @throws ParseException
	 */
	private void specialTagHandling(String tagText, final int openBracketIndex,
		int closeBracketIndex) throws ParseException
	{
		// Handle comments
		if (tagText.startsWith("!--"))
		{
			// Normal comment section.
			// Skip ahead to "-->". Note that you can not simply test for
			// tagText.endsWith("--") as the comment might contain a '>'
			// inside.
			int pos = input.find("-->", openBracketIndex + 1);
			if (pos == -1)
			{
				throw new ParseException("Unclosed comment beginning at line:" +
					input.getLineNumber() + " column:" + input.getColumnNumber(), openBracketIndex);
			}

			pos += 3;
			lastText = input.getSubstring(openBracketIndex, pos);
			lastType = COMMENT;

			// Conditional comment? 
			if (tagText.startsWith("!--[if ") && tagText.endsWith("]") &&
				lastText.toString().endsWith(""))
			{
				// Actually it is no longer a comment. It is now
				// up to the browser to select the section appropriate.
				input.setPosition(closeBracketIndex + 1);
			}
			else
			{
				input.setPosition(pos);
			}
			return;
		}

		// The closing tag of a conditional comment 
		if (tagText.equals("![endif]--"))
		{
			lastType = COMMENT;
			input.setPosition(closeBracketIndex + 1);
			return;
		}

		// CDATA sections might contain "<" which is not part of an XML tag.
		// Make sure escaped "<" are treated right
		if (tagText.startsWith("!["))
		{
			final String startText = (tagText.length() <= 8 ? tagText : tagText.substring(0, 8));
			if (startText.toUpperCase().equals("![CDATA["))
			{
				int pos1 = openBracketIndex;
				do
				{
					// Get index of closing tag and advance past the tag
					closeBracketIndex = findChar('>', pos1);

					if (closeBracketIndex == -1)
					{
						throw new ParseException("No matching close bracket at line:" +
							input.getLineNumber() + " column:" + input.getColumnNumber(),
							input.getPosition());
					}

					// Get the tagtext between open and close brackets
					tagText = input.getSubstring(openBracketIndex + 1, closeBracketIndex)
						.toString();

					pos1 = closeBracketIndex + 1;
				}
				while (tagText.endsWith("]]") == false);

				// Move to position after the tag
				input.setPosition(closeBracketIndex + 1);

				lastText = tagText;
				lastType = CDATA;
				return;
			}
		}

		if (tagText.charAt(0) == '?')
		{
			lastType = PROCESSING_INSTRUCTION;

			// Move to position after the tag
			input.setPosition(closeBracketIndex + 1);
			return;
		}

		// Move to position after the tag
		lastType = SPECIAL_TAG;
		input.setPosition(closeBracketIndex + 1);
	}

	/**
	 * Gets the next tag from the input string.
	 * 
	 * @return The extracted tag (will always be of type XmlTag).
	 * @throws ParseException
	 */
	public final MarkupElement nextTag() throws ParseException
	{
		while (next())
		{
			switch (lastType)
			{
				case TAG :
					return lastTag;

				case BODY :
					break;

				case COMMENT :
					break;

				case CDATA :
					break;

				case PROCESSING_INSTRUCTION :
					break;

				case SPECIAL_TAG :
					break;
			}
		}

		return null;
	}

	/**
	 * Find the char but ignore any text within ".." and '..'
	 * 
	 * @param ch
	 *            The character to search
	 * @param startIndex
	 *            Start index
	 * @return -1 if not found, else the index
	 */
	private int findChar(final char ch, int startIndex)
	{
		char quote = 0;

		for (; startIndex < input.size(); startIndex++)
		{
			final char charAt = input.charAt(startIndex);
			if (quote != 0)
			{
				if (quote == charAt)
				{
					quote = 0;
				}
			}
			else if ((charAt == '"') || (charAt == '\''))
			{
				quote = charAt;
			}
			else if (charAt == ch)
			{
				return startIndex;
			}
		}

		return -1;
	}

	/**
	 * Parse the given string.
	 * 
	 * Note: xml character encoding is NOT applied. It is assumed the input provided does have the
	 * correct encoding already.
	 * 
	 * @param string
	 *            The input string
	 * @throws IOException
	 *             Error while reading the resource
	 * @throws ResourceStreamNotFoundException
	 *             Resource not found
	 */
	public void parse(final CharSequence string) throws IOException,
		ResourceStreamNotFoundException
	{
		parse(new ByteArrayInputStream(string.toString().getBytes()), null);
	}

	/**
	 * Reads and parses markup from an input stream, using UTF-8 encoding by default when not
	 * specified in XML declaration.
	 * 
	 * @param in
	 *            The input stream to read and parse
	 * @throws IOException
	 * @throws ResourceStreamNotFoundException
	 */
	public void parse(final InputStream in) throws IOException, ResourceStreamNotFoundException
	{
		// When XML declaration does not specify encoding, it defaults to UTF-8
		parse(in, "UTF-8");
	}

	/**
	 * Reads and parses markup from an input stream
	 * 
	 * @param inputStream
	 *            The input stream to read and parse
	 * @param encoding
	 *            The default character encoding of the input
	 * @throws IOException
	 * @throws ResourceStreamNotFoundException
	 */
	public void parse(final InputStream inputStream, final String encoding) throws IOException,
		ResourceStreamNotFoundException
	{
		try
		{
			xmlReader = new XmlReader(new BufferedInputStream(inputStream, 4000), encoding);
			input = new FullyBufferedReader(xmlReader);
		}
		finally
		{
			inputStream.close();
			if (xmlReader != null)
			{
				xmlReader.close();
			}
		}
	}

	/**
	 * 
	 * @see org.apache.wicket.markup.parser.IXmlPullParser#setPositionMarker()
	 */
	public final void setPositionMarker()
	{
		input.setPositionMarker(input.getPosition());
	}

	/**
	 * 
	 * @see org.apache.wicket.markup.parser.IXmlPullParser#setPositionMarker(int)
	 */
	public final void setPositionMarker(final int pos)
	{
		input.setPositionMarker(pos);
	}

	/**
	 * 
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString()
	{
		return input.toString();
	}

	/**
	 * Parses the text between tags. For example, "a href=foo.html".
	 * 
	 * @param tagText
	 *            The text between tags
	 * @return A new Tag object or null if the tag is invalid
	 * @throws ParseException
	 */
	private XmlTag parseTagText(final String tagText) throws ParseException
	{
		// Get the length of the tagtext
		final int tagTextLength = tagText.length();

		// If we match tagname pattern
		final TagNameParser tagnameParser = new TagNameParser(tagText);
		if (tagnameParser.matcher().lookingAt())
		{
			final XmlTag tag = new XmlTag();

			// Extract the tag from the pattern matcher
			tag.name = tagnameParser.getName();
			tag.namespace = tagnameParser.getNamespace();

			// Are we at the end? Then there are no attributes, so we just
			// return the tag
			int pos = tagnameParser.matcher().end(0);
			if (pos == tagTextLength)
			{
				return tag;
			}

			// Extract attributes
			final VariableAssignmentParser attributeParser = new VariableAssignmentParser(tagText);
			while (attributeParser.matcher().find(pos))
			{
				// Get key and value using attribute pattern
				String value = attributeParser.getValue();

				// In case like  will the value be null
				if (value == null)
				{
					value = "";
				}

				// Set new position to end of attribute
				pos = attributeParser.matcher().end(0);

				// Chop off double quotes or single quotes
				if (value.startsWith("\"") || value.startsWith("\'"))
				{
					value = value.substring(1, value.length() - 1);
				}

				// Trim trailing whitespace
				value = value.trim();

				// Get key
				final String key = attributeParser.getKey();

				// Put the attribute in the attributes hash
				if (null != ((TagAttributes)tag.getAttributes()).putInternal(key, value))
				{
					throw new ParseException("Same attribute found twice: " + key,
						input.getPosition());
				}

				// The input has to match exactly (no left over junk after
				// attributes)
				if (pos == tagTextLength)
				{
					return tag;
				}
			}

			return tag;
		}

		return null;
	}
}