org.apache.wicket.protocol.http.WicketURLEncoder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.ops4j.pax.wicket.service Show documentation
Pax Wicket Service is an OSGi extension of the Wicket framework, allowing for dynamic loading and unloading of Wicket components and pageSources.
There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.wicket.protocol.http;

import java.io.CharArrayWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.BitSet;

import org.apache.wicket.Application;
import org.apache.wicket.WicketRuntimeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Adapted from java.net.URLEncoder, but defines instances for query string encoding versus URL path
 * component encoding.
 * 
 * The difference is important because a space is encoded as a + in a query string, but this is a
 * valid value in a path component (and is therefore not decode back to a space).
 * 
 * @author Doug Donohoe
 * @see java.net.URLEncoder
 * @see {@link "http://www.ietf.org/rfc/rfc2396.txt"}
 */
public class WicketURLEncoder
{
	private static final Logger log = LoggerFactory.getLogger(WicketURLEncoder.class);

	/**
	 * encoder types
	 */
	public enum Type {
		/**
		 * query type
		 */
		QUERY,
		/**
		 * path type
		 */
		PATH,
		/**
		 * full path type
		 */
		FULL_PATH;
	}

	// list of what not to decode
	protected BitSet dontNeedEncoding;

	// E.g. "?" for FULL_PATH encoding when querystring has already been encoded.
	private final char stopChar;

	// used in decoding
	protected static final int caseDiff = ('a' - 'A');

	/**
	 * Encoder used to encode name or value components of a query string.

	 * 

	 * 
	 * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart
	 */
	public static final WicketURLEncoder QUERY_INSTANCE = new WicketURLEncoder(Type.QUERY, '\0');

	/**
	 * Encoder used to encode components of a path.

	 * 

	 * 
	 * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart
	 */
	public static final WicketURLEncoder PATH_INSTANCE = new WicketURLEncoder(Type.PATH, '\0');

	/**
	 * Encoder used to encode all path segments. Querystring will be excluded.

	 * 

	 * 
	 * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart
	 */
	public static final WicketURLEncoder FULL_PATH_INSTANCE = new WicketURLEncoder(Type.FULL_PATH,
		'?');

	/**
	 * Allow subclass to call constructor.
	 * 
	 * @param type
	 *            encoder type
	 * @param stopChar
	 *            stop encoding when stopChar found
	 */
	protected WicketURLEncoder(Type type, char stopChar)
	{
		this.stopChar = stopChar;

		/*
		 * This note from java.net.URLEncoder ==================================
		 * 
		 * The list of characters that are not encoded has been determined as follows:
		 * 
		 * RFC 2396 states: ----- Data characters that are allowed in a URI but do not have a
		 * reserved purpose are called unreserved. These include upper and lower case letters,
		 * decimal digits, and a limited set of punctuation marks and symbols.
		 * 
		 * unreserved = alphanum | mark
		 * 
		 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
		 * 
		 * Unreserved characters can be escaped without changing the semantics of the URI, but this
		 * should not be done unless the URI is being used in a context that does not allow the
		 * unescaped character to appear. -----
		 * 
		 * It appears that both Netscape and Internet Explorer escape all special characters from
		 * this list with the exception of "-", "_", ".", "*". While it is not clear why they are
		 * escaping the other characters, perhaps it is safest to assume that there might be
		 * contexts in which the others are unsafe if not escaped. Therefore, we will use the same
		 * list. It is also noteworthy that this is consistent with O'Reilly's "HTML: The Definitive
		 * Guide" (page 164).
		 * 
		 * As a last note, Intenet Explorer does not encode the "@" character which is clearly not
		 * unreserved according to the RFC. We are being consistent with the RFC in this matter, as
		 * is Netscape.
		 * 
		 * This bit added by Doug Donohoe ================================== RFC 3986 (2005) updates
		 * this (http://tools.ietf.org/html/rfc3986):
		 * 
		 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
		 * 
		 * pct-encoded = "%" HEXDIG HEXDIG
		 * 
		 * reserved = gen-delims / sub-delims
		 * 
		 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
		 * 
		 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" // -- PATH
		 * COMPONENT -- //
		 * 
		 * path = (see RFC for all variations) path-abempty =( "/" segment ) segment =pchar pchar =
		 * unreserved / pct-encoded / sub-delims / ":" / "@" // -- QUERY COMPONENT -- //
		 * 
		 * query =( pchar / "/" / "?" )
		 */

		// unreserved
		dontNeedEncoding = new BitSet(256);
		int i;
		for (i = 'a'; i <= 'z'; i++)
		{
			dontNeedEncoding.set(i);
		}
		for (i = 'A'; i <= 'Z'; i++)
		{
			dontNeedEncoding.set(i);
		}
		for (i = '0'; i <= '9'; i++)
		{
			dontNeedEncoding.set(i);
		}
		dontNeedEncoding.set('-');
		dontNeedEncoding.set('.');
		dontNeedEncoding.set('_');
		dontNeedEncoding.set('~'); // tilde encoded by java.net.URLEncoder version, but RFC is
		// clear on this

		// sub-delims
		dontNeedEncoding.set('!');
		dontNeedEncoding.set('$');
		// "&" needs to be encoded for query stings
		dontNeedEncoding.set('\'');
		// "(" and ")" probably don't need encoding, but we'll be conservative
		dontNeedEncoding.set('*');
		// "+" needs to be encoded for query strings (since it means =
		dontNeedEncoding.set(',');
		// ";" encoded due to use in path and/or query as delim in some instances (e.g., jsessionid)
		// "=" needs to be encoded for query strings

		// pchar
		dontNeedEncoding.set(':'); // allowed and used in wicket interface params
		dontNeedEncoding.set('@');

		// encoding type-specific
		switch (type)
		{
			// this code consistent with java.net.URLEncoder version
			case QUERY :
				dontNeedEncoding.set(' '); /*
											 * encoding a space to a + is done in the encode()
											 * method
											 */
				dontNeedEncoding.set('/'); // to allow direct passing of URL in query
				dontNeedEncoding.set('?'); // to allow direct passing of URL in query
				break;

			// this added to deal with encoding a PATH component
			case PATH :
				// encode ' ' with a % instead of + in path portion

				// path component sub-delim values we do not need to escape
				dontNeedEncoding.set('&');
				dontNeedEncoding.set('=');
				dontNeedEncoding.set('+');
				break;

			// same as path, but '/' will not be encoded
			case FULL_PATH :
				// encode ' ' with a % instead of + in path portion

				// path component sub-delim values we do not need to escape
				dontNeedEncoding.set('&');
				dontNeedEncoding.set('=');
				dontNeedEncoding.set('+');

				dontNeedEncoding.set('/');
				break;
		}
	}

	/**
	 * Calls encode with the application response request encoding as returned by
	 * Application.get().getRequestCycleSettings().getResponseRequestEncoding()
	 * 
	 * @param s
	 *            Value to encode
	 * @return String encoded using default Application request/respose encoding
	 */
	public String encode(String s)
	{
		Application app = null;

		try
		{
			app = Application.get();
		}
		catch (WicketRuntimeException ignored)
		{
			log.warn("No current Application found - defaulting encoding to UTF-8");
		}
		return encode(s, app == null ? "UTF-8" : app.getRequestCycleSettings()
			.getResponseRequestEncoding());
	}

	/**
	 * @param s
	 *            string to encode
	 * @param enc
	 *            encoding to use
	 * @return encoded string
	 * @see java.net.URLEncoder#encode(String, String)
	 */
	public String encode(String s, String enc)
	{
		boolean needToChange = false;
		StringBuffer out = new StringBuffer(s.length());
		Charset charset;
		CharArrayWriter charArrayWriter = new CharArrayWriter();

		if (enc == null)
		{
			throw new NullPointerException("charsetName");
		}

		try
		{
			charset = Charset.forName(enc);
		}
		catch (IllegalCharsetNameException e)
		{
			throw new WicketRuntimeException(new UnsupportedEncodingException(enc));
		}
		catch (UnsupportedCharsetException e)
		{
			throw new WicketRuntimeException(new UnsupportedEncodingException(enc));
		}

		boolean stopEncoding = false;
		for (int i = 0; i < s.length();)
		{
			int c = s.charAt(i);

			if ((stopEncoding == false) && (c == stopChar))
			{
				stopEncoding = true;
			}

			// System.out.println("Examining character: " + c);
			if ((stopEncoding == true) || dontNeedEncoding.get(c))
			{
				if (c == ' ')
				{
					c = '+';
					needToChange = true;
				}
				// System.out.println("Storing: " + c);
				out.append((char)c);
				i++;
			}
			else
			{
				// convert to external encoding before hex conversion
				do
				{
					charArrayWriter.write(c);
					/*
					 * If this character represents the start of a Unicode surrogate pair, then pass
					 * in two characters. It's not clear what should be done if a bytes reserved in
					 * the surrogate pairs range occurs outside of a legal surrogate pair. For now,
					 * just treat it as if it were any other character.
					 */
					if (c >= 0xD800 && c <= 0xDBFF)
					{
						/*
						 * System.out.println(Integer.toHexString(c) + " is high surrogate");
						 */
						if ((i + 1) < s.length())
						{
							int d = s.charAt(i + 1);
							/*
							 * System.out.println("\tExamining " + Integer.toHexString(d));
							 */
							if (d >= 0xDC00 && d <= 0xDFFF)
							{
								/*
								 * System.out.println("\t" + Integer.toHexString(d) + " is low
								 * surrogate");
								 */
								charArrayWriter.write(d);
								i++;
							}
						}
					}
					i++;
				}
				while (i < s.length() && !dontNeedEncoding.get((c = s.charAt(i))));

				charArrayWriter.flush();
				String str = new String(charArrayWriter.toCharArray());
				byte[] ba = new byte[0];
				try
				{
					ba = str.getBytes(charset.name());
				}
				catch (UnsupportedEncodingException e)
				{
					throw new WicketRuntimeException(e);
				}
				for (int j = 0; j < ba.length; j++)
				{
					out.append('%');
					char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16);
					// converting to use uppercase letter as part of
					// the hex value if ch is a letter.
					if (Character.isLetter(ch))
					{
						ch -= caseDiff;
					}
					out.append(ch);
					ch = Character.forDigit(ba[j] & 0xF, 16);
					if (Character.isLetter(ch))
					{
						ch -= caseDiff;
					}
					out.append(ch);
				}
				charArrayWriter.reset();
				needToChange = true;
			}
		}

		return (needToChange ? out.toString() : s);
	}
}