All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.wicket.util.io.XmlReader Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.wicket.util.io;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.wicket.util.lang.Args;
import org.apache.wicket.util.string.Strings;


/**
 * This is a simple XmlReader. Its only purpose is to read the xml decl string from the input and
 * apply proper character encoding to all subsequent characters. The xml decl string itself is
 * removed from the output.
 * 
 * @author Juergen Donnerstag
 */
public final class XmlReader extends Reader
{
	/** Regex to find  */
	private static final Pattern xmlDecl = Pattern.compile("[\\s\\n\\r]*<\\?xml(\\s+.*)?\\?>");

	/** Regex to find  */
	private static final Pattern encodingPattern = Pattern.compile("\\s+encoding\\s*=\\s*([\"\'](.*?)[\"\']|(\\S*)).*\\?>");

	/** Null, if JVM default. Else from  */
	private String encoding;

	/** The input stream to read the data from */
	private final InputStream inputStream;

	/** The reader which does the character encoding */
	private Reader reader;

	/**
	 * Construct.
	 * 
	 * @param inputStream
	 *            The InputStream to read the xml data from
	 * @param defaultEncoding
	 *            Default character encoding to use when not specified in XML declaration, specify
	 *            null to use JVM default
	 * @throws IOException
	 *             In case something went wrong while reading the data
	 */
	public XmlReader(final InputStream inputStream, final String defaultEncoding)
		throws IOException
	{
		Args.notNull(inputStream, "inputStream");

		if (!inputStream.markSupported())
		{
			this.inputStream = new BufferedInputStream(new BOMInputStream(inputStream));
		}
		else
		{
			this.inputStream = new BOMInputStream(inputStream);
		}
		encoding = defaultEncoding;

		init();
	}

	/**
	 * Return the encoding used while reading the markup file.
	 * 
	 * @return if null, then JVM default
	 */
	public final String getEncoding()
	{
		return encoding;
	}

	/**
	 * Reads and parses markup from a resource such as file.
	 * 
	 * @throws IOException
	 */
	public void init() throws IOException
	{
		// read ahead buffer required for the first line of the markup (encoding)
		final int readAheadSize = 80;
		inputStream.mark(readAheadSize);

		// read-ahead the input stream and check if it starts with .
		String xmlDeclaration = getXmlDeclaration(inputStream, readAheadSize);
		if (!Strings.isEmpty(xmlDeclaration))
		{
			// If yes than determine the encoding from the xml decl
			encoding = determineEncoding(xmlDeclaration);
		}
		else
		{
			// If not, reset the input stream to the beginning of the file
			inputStream.reset();
		}

		if (encoding == null)
		{
			// Use JVM default
			reader = new InputStreamReader(inputStream);
		}
		else
		{
			// Use the encoding provided
			reader = new InputStreamReader(inputStream, encoding);
		}
	}

	/**
	 * Determine the encoding from the xml decl.
	 * 
	 * @param string
	 *            The xmlDecl string
	 * @return The encoding. Null, if not found
	 */
	private String determineEncoding(final CharSequence string)
	{
		// Does the string match the  pattern
		final Matcher matcher = encodingPattern.matcher(string);
		if (!matcher.find())
		{
			// No
			return null;
		}

		// Extract the encoding
		String encoding = matcher.group(2);
		if ((encoding == null) || (encoding.length() == 0))
		{
			encoding = matcher.group(3);
		}

		if (encoding != null)
		{
			encoding = encoding.trim();
		}

		return encoding;
	}

	/**
	 * Read-ahead the input stream (markup file). If the first line contains <?xml...?>, than
	 * remember the xml decl for later to determine the encoding.
	 * 

* The xml decl will not be forwarded to the user. * * @param in * The markup file * @param readAheadSize * The read ahead buffer available to read the xml encoding information * @return true, if <?xml ..?> has been found * @throws IOException */ private String getXmlDeclaration(final InputStream in, final int readAheadSize) throws IOException { // Max one line final StringBuilder pushBack = new StringBuilder(readAheadSize); // The current char from the markup file int value; while ((value = in.read()) != -1) { pushBack.append((char)value); // Stop at the end of the first tag or end of line. If it is HTML // without newlines, stop after X bytes (= characters) if ((value == '>') || (value == '\n') || (value == '\r') || (pushBack.length() >= (readAheadSize - 1))) { break; } } // Does the string match the pattern final Matcher matcher = xmlDecl.matcher(pushBack); if (!matcher.matches()) { // No return null; } // Save the whole string for later return pushBack.toString().trim(); } /** * @see java.io.Reader#close() */ @Override public void close() throws IOException { try { reader.close(); } finally { inputStream.close(); } } /** * @see java.io.Reader#read(char[], int, int) */ @Override public int read(final char[] buf, final int from, final int to) throws IOException { return reader.read(buf, from, to); } /** * @return The markup to be parsed */ @Override public String toString() { return inputStream.toString() + " (" + encoding + ")"; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy