All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.myfaces.trinidadinternal.share.url.EncoderUtils Maven / Gradle / Ivy

The newest version!
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 * 
 *  http://www.apache.org/licenses/LICENSE-2.0
 * 
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package org.apache.myfaces.trinidadinternal.share.url;

import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import java.io.IOException;
import java.io.Writer;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;

import java.util.BitSet;

import org.apache.myfaces.trinidad.util.ThreadLocalUtils;

/**
 * Utility functions for encoding URLs for output.
 * This class contains three classes of encoding functions:
 * 
    *
  1. Functions that encode entire URLs for final output. The * UIX rendering layer automatically uses these functions, * so it is generally not necessary. *
      *
    • {@link #encodeURL encodeURL} *
    • {@link #writeURL writeURL} *
    *
  2. Functions that encode query parameters to be handed * off to UIX Components (aka Marlin beans or UIX rendering). *
      *
    • {@link #encodeUIXQueryParameter encodeUIXQueryParameter} *
    • {@link #appendUIXQueryParameter appendUIXQueryParameter} *
    *
  3. Functions that encode query parameters for final output. * These should be used only when the parameters are not * being rendered with UIX (or UIX Components, aka Marlin Beans). *
      *
    • {@link #encodeString encodeString} *
    *
*

The major difference between these types of functions * is what characters they will encode. *

Category 1 (entire URLs) * will perform correct NLS encodings, but cannot encode * '%', '&' '=', '?', '#', and '+', as all of these characters * have very specific meanings in an URL. In the case of '%' and '+' * note that this means UIX will never double-encode an URL. * *

Category 2 (parameters for UIX) will encode only * '%', '+', '&', and '#'. Note that it does not perform * NLS encodings, or any disallowed URL characters (like the space * character), as it relies on the UIX rendering layer to * handle these where it can be performed more efficiently. * Also, note that '+' and '%' are encoded, so this function * should not be used on parameters that have already been URL encoded. *

* Finally, Category 3 (parameters for final output) encodes * all characters except for '-', '_', '.', and '/'. There is * no need to call these functions if you'll be using UIX (or * Marlin beans) to render the URL. *

* EncoderUtils also includes support for manually decoding strings. * This is generally not necessary, as Servlets are generally responsible * for decoding parameters themselves, but developers working around * buggy servlet engines may find this useful, as will developers * writing their own HTTP code. *

* @version $Name: $ ($Revision: adfrt/faces/adf-faces-impl/src/main/java/oracle/adfinternal/view/faces/share/url/EncoderUtils.java#0 $) $Date: 15-nov-2005.19:26:40 $ */ public class EncoderUtils { /** * Encodes a string into URL-encoded format. For details * on exactly how this method works, please see the {@link #writeURL} * method. *

* @param text the unencoded (or partially encoded) String * @param queryEncoding the character set encoding for after the first * question mark * @param useISOForBody if false, text before the first question * mark will be encoded with UTF-8; if true, encoded with ISO-8859-1. * @return the encoded string */ static public String encodeURL( String text, String queryEncoding, boolean useISOForBody) throws UnsupportedEncodingException { StringWriter sw = new StringWriter(text.length()); try { writeURL(sw, text, queryEncoding, useISOForBody); } catch (UnsupportedEncodingException uee) { // Catch and rethrow - this exception can happen throw uee; } catch (IOException ioe) { // This should never happen - StringWriters don't throw anything! assert(false); } return sw.toString(); } /** * Writes a string into URL-encoded format out to a Writer. *

* All characters before the start of the query string will be encoded * using either UTF-8 or ISO-8859-1. The former is the ideal standard, * as it allows any Unicode character to be used in the path. However, * many web servers assume ISO-8859-1 instead, hence the option. *

* Characters after the start of the query string will be encoded * using a client-defined encoding. You'll need to use the encoding * that the server will expect. (HTML forms will generate query * strings using the character encoding that the HTML itself was * generated in.) *

* All characters will be encoded as needed for URLs, with the exception * of the percent symbol ("%"). Because this is the character * itself used for escaping, attempting to escape this character * would cause this code to double-escape some strings. It also may * be necessary to pre-escape some characters. In particular, a * question mark ("?") is considered the start of the query string. *

* @param out a Writer for the output * @param text the unencoded (or partially encoded) String * @param queryEncoding the character set encoding for after the first * question mark * @param useISOForBody if false, text before the first question * mark will be encoded with UTF-8; if true, encoded with ISO-8859-1. */ static public void writeURL( Writer out, String text, String queryEncoding, boolean useISOForBody) throws IOException, UnsupportedEncodingException { _writeURL(out, text, queryEncoding, useISOForBody, 0, false, false); } /** * A clone of writeURL(), but applying rules knowing that it * is outputting HTML. */ static public void writeURLForHTML( Writer out, String text, String queryEncoding, boolean useISOForBody) throws IOException, UnsupportedEncodingException { _writeURL(out, text, queryEncoding, useISOForBody, 0, false, true); } /** * Writes a query parameter. Very few clients will * need to use this method - most will just call * writeURL or encodeURL. */ static public void writeQueryParameters( Writer out, String text, String encoding, int start, boolean forHtml) throws IOException, UnsupportedEncodingException { _encodeString(out, text, encoding, start, _DONT_ENCODE_SET, true, forHtml); } /** * Writes a query parameter to the String Builder */ static public void writeQueryParametersToStringBuilder( StringBuilder sbout, char[] cArray, String encoding, int start, boolean forHtml) throws IOException, UnsupportedEncodingException { _encodeStringToStringBuilder(sbout, cArray, encoding, start, _DONT_ENCODE_SET, true, forHtml); } /** * Partially encodes a query parameter. The characters '%', * '#', '&',. and '+' will be encoded, but no other * characters will be. This method should only be used * for encoding strings that will be passed to the UIX rendering * layer, because it does not perform the pieces of URL encoding * automatically handled in that layer. */ static public String encodeUIXQueryParameter(String text) { int length = text.length(); for (int i = 0; i < length; i++) { char ch = text.charAt(i); if (_isSpecialQueryParameterChar(ch)) { // Append the entire string, then trim back to only // include the first "i" characters that don't need // to be escaped. This optimization avoids the allocation // from calling substring(). Also, the built-in overallocation // of StringBuffer by 16 will obviate the need for reallocation // of the buffer as long as there's five or fewer "special" characters StringBuffer buffer = new StringBuffer(text); buffer.setLength(i); _appendUIXQueryParameter(buffer, text, i); return new String(buffer); } } return text; } public static String appendURLArguments( StringBuffer buffer, String baseURL, String[] keysAndValues ) { // Bug 1814825: the anchor has to stay on the end. int anchorIndex = baseURL.indexOf('#'); if (anchorIndex >= 0) buffer.append(baseURL.substring(0, anchorIndex)); else buffer.append(baseURL); boolean queryAppended = (baseURL.indexOf('?') >= 0); for (int i = 0; i < keysAndValues.length; i += 2) { String value = keysAndValues[i+1]; if (value != null) { // only append '?' at start if the URL doesn't already contain // arguments if (!queryAppended) { queryAppended = true; buffer.append('?'); } else buffer.append('&'); buffer.append(keysAndValues[i]); buffer.append('='); buffer.append(value); } } String beforeEncode = buffer.toString(); return beforeEncode; } public static String appendURLArguments( String baseURL, String[] keysAndValues ) { // buffer length = base + separators + keys + values int bufferLength = baseURL.length() + keysAndValues.length; for (int i = 0; i < keysAndValues.length; i +=2) { String value = keysAndValues[i + 1]; if (value != null) { bufferLength += keysAndValues[i].length() + value.length(); } } StringBuffer buffer = new StringBuffer(bufferLength); return appendURLArguments(buffer, baseURL, keysAndValues); } /** * Partially encodes a query parameter. The characters '%', * '#', '&',. and '+' will be encoded, but no other * characters will be. This method should only be used * for encoding strings that will be passed to the UIX rendering * layer, because it does not perform the pieces of URL encoding * automatically handled in that layer. */ static public void appendUIXQueryParameter( StringBuffer buffer, String text) { buffer.append(text); int length = text.length(); for (int i = 0; i < length; i++) { char ch = text.charAt(i); if (_isSpecialQueryParameterChar(ch)) { // Peel back all the characters that may need to be encoded // This is an optimization that assumes that the vast majority // of values won't need to encoded at all. buffer.setLength(buffer.length() - (length - i)); _appendUIXQueryParameter(buffer, text, i); break; } } } static private void _writeURL( Writer out, String text, String queryEncoding, boolean useISOForBody, int start, boolean inAnchor, boolean forHtml) throws IOException, UnsupportedEncodingException { int length = text.length(); //Get the shared String Builder StringBuilder sbout = __getSharedStringBuilder(length * 3/2); //Convert to char array for faster access char[] cArray = text.toCharArray(); //Write the URL to the StringBuilder _writeURLtoStringBuilder(sbout, cArray, queryEncoding, useISOForBody, start, inAnchor, forHtml); //Write to the writer out.write(sbout.toString()); } static private void _writeURLtoStringBuilder( StringBuilder sbout, char[] cArray, String queryEncoding, boolean useISOForBody, int start, boolean inAnchor, boolean forHtml) throws IOException, UnsupportedEncodingException { int length = cArray.length; for (int i = start; i < length; i++) { char ch = cArray[i]; // Following code derived from Xalan 2.0 SerializerToHTML, // Apache Software Foundation, http://www.apache.org/ if ((ch < 33) || (ch > 126)) { if (useISOForBody) { // ISO-8859-1. Blindly assume the character will be < 255. // Not much we can do if it isn't. _writeDoubleHex(sbout, ch); } else { // Following code derived from Xalan 2.0 SerializerToHTML, // Encode UTF16 to UTF8. // Reference is Unicode, A Primer, by Tony Graham. // Page 92. if (ch <= 0x7F) { _writeDoubleHex(sbout, ch); } else if (ch <= 0x7FF) { // Clear low 6 bits before rotate, put high 4 bits in low byte, // and set two high bits. int high = (ch >> 6) | 0xC0; int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit _writeDoubleHex(sbout, high); _writeDoubleHex(sbout, low); } // high surrogate else if ((ch & 0xFC00) == 0xD800) { // Clear high 6 bits. int highSurrogate = ch & 0x03FF; // Middle 4 bits (wwww) + 1 // "Note that the value of wwww from the high surrogate bit // pattern is incremented to make the uuuuu bit pattern in // the scalar value so the surrogate pair don't address the BMP." int wwww = ((highSurrogate & 0x03C0) >> 6); int uuuuu = wwww+1; // next 4 bits int zzzz = (highSurrogate & 0x003C) >> 2; // low 2 bits int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; // Get low surrogate character. i++; if (i < length) ch = cArray[i]; else // =-=AEW Shouldn't happen ch = 0; // Clear high 6 bits. int lowSurrogate = ch & 0x03FF; // put the middle 4 bits into the bottom of yyyyyy (byte 3) yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); // bottom 6 bits. int xxxxxx = (lowSurrogate & 0x003F); int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu int byte2 = 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; int byte3 = 0x80 | yyyyyy; int byte4 = 0x80 | xxxxxx; _writeDoubleHex(sbout, byte1); _writeDoubleHex(sbout, byte2); _writeDoubleHex(sbout, byte3); _writeDoubleHex(sbout, byte4); } else { int high = (ch >> 12) | 0xE0; // top 4 bits int middle = ((ch & 0x0FC0) >> 6) | 0x80; // middle 6 bits int low = (ch & 0x3F) | 0x80; // First 6 bits, + high bit _writeDoubleHex(sbout, high); _writeDoubleHex(sbout, middle); _writeDoubleHex(sbout, low); } } } /* =-=AEW DO NOT encode '%'. If you do, then for starters, we'll double-encode anything that's pre-encoded. And, what's worse, there becomes no way to use characters that must be encoded if you don't want them to be interpreted, like '?' or '&'. else if('%' == ch) { _writeDoubleHex(out, ch); } */ else if (ch == '"') { sbout.append("%22"); } else if (ch == '?') { if (inAnchor) { sbout.append("%3F"); } else { // AEW: And - everything in the query parameters is interpreted // as if it were in the request's character set. So use // the real encoding for those! sbout.append('?'); writeQueryParametersToStringBuilder(sbout, cArray, queryEncoding, i + 1, forHtml); return; } } else if (ch == '&') { if (forHtml) sbout.append("&"); else sbout.append(ch); } else { sbout.append(ch); } } } static private boolean _isSpecialQueryParameterChar(char ch) { return ((ch == '%') || (ch == '+') || (ch == '&') || (ch == '#')); } static private void _appendUIXQueryParameter( StringBuffer buffer, String text, int from) { int length = text.length(); for (int j = from; j < length; j++) { char c = text.charAt(j); switch (c) { case '#': buffer.append("%23"); break; case '&': buffer.append("%26"); break; case '%': buffer.append("%25"); break; case '+': buffer.append("%2b"); break; default: buffer.append(c); break; } } } /** * Fully encodes a string - all characters with * special meanings in an URL will be '%' encoded. This * means that this function can double-encode strings * that are already encoded, and will cause serious * problems if used for an entire URL, since it'll * escape characters like '?' and '&'. Generally, * this function should be used only for encoding * single query values. */ static public String encodeString( String text, String encoding) throws UnsupportedEncodingException { StringWriter sw = new StringWriter(text.length()); try { _encodeString(sw, text, encoding, 0, _DONT_ENCODE_SET_SMALL, false, false); } catch (UnsupportedEncodingException uee) { // Catch and rethrow - this exception can happen throw uee; } catch (IOException ioe) { // This should never happen - StringWriters don't throw anything! assert(false); } return sw.toString(); } /** * URL-decodes a string. This function assumes * that (for query parameters) * CaboHttpUtils.decodeRequestParameter() has already * been called, and this function is being passed the result * of that function. That is: if the string does not * contain any '%' characters, the string will be returned unmodified. *

* @see org.apache.myfaces.trinidadinternal.share.util.CaboHttpUtils#decodeRequestParameter * @param text the original text * @param encoding the character encoding to assume */ static public String decodeString( String text, String encoding) throws UnsupportedEncodingException { int firstPercent = text.indexOf('%'); // No '%'s - just return. if (firstPercent < 0) return text; int length = text.length(); StringBuffer buffer = new StringBuffer(length); // Slam in everything up to the first percent, // then trim back. This is assumed to be faster than // calling substring(). buffer.append(text); buffer.setLength(firstPercent); byte[] bytes = null; int byteIndex = 0; for (int i = firstPercent; i < length; i++) { char ch = text.charAt(i); // If it's a normal character, just append if (ch != '%') { // But flush the byte buffer first. if (byteIndex > 0) { buffer.append(new String(bytes, 0, byteIndex, encoding)); byteIndex = 0; } buffer.append(ch); } else { // We're after a percent; add to the byte-buffer. if (i + 2 < length) { byte b = (byte) ((_fromHex(text.charAt(i + 1)) << 4) | _fromHex(text.charAt(i + 2))); // No encoding - blindly assume ISO-8859-1 (the Servlet default) if (encoding == null) { buffer.append((char) b); } else { if (bytes == null) { bytes = new byte[10]; } else if (bytes.length == byteIndex) { byte[] newBytes = new byte[byteIndex * 2]; System.arraycopy(bytes, 0, newBytes, 0, byteIndex); bytes = newBytes; } bytes[byteIndex] = b; byteIndex++; } } else { //??? an error case. No obvious correct results, so truncate. ; } i = i + 2; } } if (byteIndex > 0) { buffer.append(new String(bytes, 0, byteIndex, encoding)); } return new String(buffer); } static private void _encodeStringToStringBuilder( StringBuilder sbout, char[] cArray, String encoding, int start, BitSet dontEncodeSet, boolean lookForAnchor, boolean forHtml) throws IOException, UnsupportedEncodingException { ByteArrayOutputStream buf = null; OutputStreamWriter writer = null; char[] charArray = null; int length = cArray.length; for (int i = start; i < length; i++) { char ch = cArray[i]; // If we care about anchors and find one, abort here // and start writing out the rest of the text with _writeURL() if (lookForAnchor && ('#' == ch)) { sbout.append(ch); _writeURLtoStringBuilder(sbout, cArray, encoding, false, i + 1, true, forHtml); return; } else if (dontEncodeSet.get(ch)) { if (forHtml && (ch == '&')) { sbout.append("&"); } else { sbout.append(ch); } } else { if (buf == null) { buf = new ByteArrayOutputStream(_MAX_BYTES_PER_CHAR); if (encoding != null) writer = new OutputStreamWriter(buf, encoding); else writer = new OutputStreamWriter(buf); charArray = new char[1]; } // convert to external encoding before hex conversion try { // An inspection of OutputStreamWriter reveals // that write(char) always allocates a one element // character array. We can reuse our own. charArray[0] = ch; writer.write(charArray, 0, 1); writer.flush(); } catch(IOException e) { buf.reset(); continue; } byte[] ba = buf.toByteArray(); for (int j = 0; j < ba.length; j++) { _writeDoubleHex(sbout, ba[j] + 256); } buf.reset(); } } } static private void _encodeString( Writer out, String text, String encoding, int start, BitSet dontEncodeSet, boolean lookForAnchor, boolean forHtml) throws IOException, UnsupportedEncodingException { ByteArrayOutputStream buf = null; OutputStreamWriter writer = null; char[] charArray = null; int length = text.length(); for (int i = start; i < length; i++) { char ch = text.charAt(i); // If we care about anchors and find one, abort here // and start writing out the rest of the text with _writeURL() if (lookForAnchor && ('#' == ch)) { out.write(ch); _writeURL(out, text, encoding, false, i + 1, true, forHtml); return; } else if (dontEncodeSet.get(ch)) { if (forHtml && (ch == '&')) { out.write("&"); } else { out.write(ch); } } else { if (buf == null) { buf = new ByteArrayOutputStream(_MAX_BYTES_PER_CHAR); if (encoding != null) writer = new OutputStreamWriter(buf, encoding); else writer = new OutputStreamWriter(buf); charArray = new char[1]; } // convert to external encoding before hex conversion try { // An inspection of OutputStreamWriter reveals // that write(char) always allocates a one element // character array. We can reuse our own. charArray[0] = ch; writer.write(charArray, 0, 1); writer.flush(); } catch(IOException e) { buf.reset(); continue; } byte[] ba = buf.toByteArray(); for (int j = 0; j < ba.length; j++) { _writeDoubleHex(out, ba[j] + 256); } buf.reset(); } } } static private void _writeDoubleHex( Writer out, int i) throws IOException { out.write('%'); out.write(_getHex((i >> 4) % 0x10)); out.write(_getHex(i % 0x10)); } static private void _writeDoubleHex( StringBuilder sb, int i) throws IOException { sb.append('%'); sb.append(_getHex((i >> 4) % 0x10)); sb.append(_getHex(i % 0x10)); } static private char _getHex( int i) { if (i < 10) return ((char) ('0' + i)); else return ((char) ('A' + (i - 10))); } static private byte _fromHex(char ch) { if ((ch >= '0') && (ch <= '9')) return (byte) (ch - '0'); if ((ch >= 'a') && (ch <= 'f')) return (byte) (10 + (ch - 'a')); return (byte) (10 + (ch - 'A')); } static StringBuilder __getSharedStringBuilder(int capacity) { StringBuilder sb = _STRING_BUILDER.get(); if (sb == null) { sb = new StringBuilder(capacity); _STRING_BUILDER.set(sb); } else { sb.ensureCapacity(capacity); } // clear out the stringBuilder by setting the length to 0 sb.setLength(0); return sb; } static private final int _MAX_BYTES_PER_CHAR = 10; static private final BitSet _DONT_ENCODE_SET = new BitSet(256); // A smaller set of characters not to encode, used // for encodeString() static private final BitSet _DONT_ENCODE_SET_SMALL = new BitSet(256); // See "http://www.ietf.org/rfc/rfc2396.txt" // We're not fully along for that ride either, but we do encode // ' ' as '%20', and don't bother encoding '~' or '/' static { for (int i = 'a'; i <= 'z'; i++) { _DONT_ENCODE_SET.set(i); _DONT_ENCODE_SET_SMALL.set(i); } for (int i = 'A'; i <= 'Z'; i++) { _DONT_ENCODE_SET.set(i); _DONT_ENCODE_SET_SMALL.set(i); } for (int i = '0'; i <= '9'; i++) { _DONT_ENCODE_SET.set(i); _DONT_ENCODE_SET_SMALL.set(i); } // Don't encode '%' - we don't want to double encode anything. _DONT_ENCODE_SET.set('%'); // Ditto for '+', which is an encoded space _DONT_ENCODE_SET.set('+'); _DONT_ENCODE_SET.set('#'); _DONT_ENCODE_SET.set('&'); _DONT_ENCODE_SET.set('='); _DONT_ENCODE_SET.set('-'); _DONT_ENCODE_SET_SMALL.set('-'); _DONT_ENCODE_SET.set('_'); _DONT_ENCODE_SET_SMALL.set('_'); _DONT_ENCODE_SET.set('.'); _DONT_ENCODE_SET_SMALL.set('.'); _DONT_ENCODE_SET.set('*'); _DONT_ENCODE_SET.set('~'); _DONT_ENCODE_SET.set('/'); _DONT_ENCODE_SET_SMALL.set('/'); _DONT_ENCODE_SET.set('\''); _DONT_ENCODE_SET.set('!'); _DONT_ENCODE_SET.set('('); _DONT_ENCODE_SET.set(')'); _DONT_ENCODE_SET.set(';'); //ADFFACES-235 ";" are valid chars in querystrings } static private final ThreadLocal _STRING_BUILDER = ThreadLocalUtils.newRequestThreadLocal(); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy