org.apache.wicket.util.io.XmlReader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.wicket.util.io;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.wicket.util.lang.Args;
import org.apache.wicket.util.string.Strings;
/**
* This is a simple XmlReader. Its only purpose is to read the xml decl string from the input and
* apply proper character encoding to all subsequent characters. The xml decl string itself is
* removed from the output.
*
* @author Juergen Donnerstag
*/
public final class XmlReader extends Reader
{
/** Regex to find */
private static final Pattern xmlDecl = Pattern.compile("[\\s\\n\\r]*<\\?xml(\\s+.*)?\\?>");
/** Regex to find */
private static final Pattern encodingPattern = Pattern.compile("\\s+encoding\\s*=\\s*([\"\'](.*?)[\"\']|(\\S*)).*\\?>");
/** Null, if JVM default. Else from */
private String encoding;
/** The input stream to read the data from */
private final InputStream inputStream;
/** The reader which does the character encoding */
private Reader reader;
/**
* Construct.
*
* @param inputStream
* The InputStream to read the xml data from
* @param defaultEncoding
* Default character encoding to use when not specified in XML declaration, specify
* null to use JVM default
* @throws IOException
* In case something went wrong while reading the data
*/
public XmlReader(final InputStream inputStream, final String defaultEncoding)
throws IOException
{
Args.notNull(inputStream, "inputStream");
if (!inputStream.markSupported())
{
this.inputStream = new BufferedInputStream(new BOMInputStream(inputStream));
}
else
{
this.inputStream = new BOMInputStream(inputStream);
}
encoding = defaultEncoding;
init();
}
/**
* Return the encoding used while reading the markup file.
*
* @return if null, then JVM default
*/
public final String getEncoding()
{
return encoding;
}
/**
* Reads and parses markup from a resource such as file.
*
* @throws IOException
*/
public void init() throws IOException
{
// read ahead buffer required for the first line of the markup (encoding)
final int readAheadSize = 80;
inputStream.mark(readAheadSize);
// read-ahead the input stream and check if it starts with .
String xmlDeclaration = getXmlDeclaration(inputStream, readAheadSize);
if (!Strings.isEmpty(xmlDeclaration))
{
// If yes than determine the encoding from the xml decl
encoding = determineEncoding(xmlDeclaration);
}
else
{
// If not, reset the input stream to the beginning of the file
inputStream.reset();
}
if (encoding == null)
{
// Use JVM default
reader = new InputStreamReader(inputStream);
}
else
{
// Use the encoding provided
reader = new InputStreamReader(inputStream, encoding);
}
}
/**
* Determine the encoding from the xml decl.
*
* @param string
* The xmlDecl string
* @return The encoding. Null, if not found
*/
private String determineEncoding(final CharSequence string)
{
// Does the string match the pattern
final Matcher matcher = encodingPattern.matcher(string);
if (!matcher.find())
{
// No
return null;
}
// Extract the encoding
String encoding = matcher.group(2);
if ((encoding == null) || (encoding.length() == 0))
{
encoding = matcher.group(3);
}
if (encoding != null)
{
encoding = encoding.trim();
}
return encoding;
}
/**
* Read-ahead the input stream (markup file). If the first line contains <?xml...?>, than
* remember the xml decl for later to determine the encoding.
*
* The xml decl will not be forwarded to the user.
*
* @param in
* The markup file
* @param readAheadSize
* The read ahead buffer available to read the xml encoding information
* @return true, if <?xml ..?> has been found
* @throws IOException
*/
private String getXmlDeclaration(final InputStream in, final int readAheadSize)
throws IOException
{
// Max one line
final StringBuilder pushBack = new StringBuilder(readAheadSize);
// The current char from the markup file
int value;
while ((value = in.read()) != -1)
{
pushBack.append((char)value);
// Stop at the end of the first tag or end of line. If it is HTML
// without newlines, stop after X bytes (= characters)
if ((value == '>') || (value == '\n') || (value == '\r') ||
(pushBack.length() >= (readAheadSize - 1)))
{
break;
}
}
// Does the string match the pattern
final Matcher matcher = xmlDecl.matcher(pushBack);
if (!matcher.matches())
{
// No
return null;
}
// Save the whole string for later
return pushBack.toString().trim();
}
/**
* @see java.io.Reader#close()
*/
@Override
public void close() throws IOException
{
try
{
reader.close();
}
finally
{
inputStream.close();
}
}
/**
* @see java.io.Reader#read(char[], int, int)
*/
@Override
public int read(final char[] buf, final int from, final int to) throws IOException
{
return reader.read(buf, from, to);
}
/**
* @return The markup to be parsed
*/
@Override
public String toString()
{
return inputStream.toString() + " (" + encoding + ")";
}
}