All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ch.sentric.PercentCodec Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2013 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ch.sentric;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.Charset;

/**
 * 

* Percent-encoding, also known as URL encoding, is a mechanism for encoding * information in a Uniform Resource Identifier (URI) under certain * circumstances. Although it is known as URL encoding it is, in fact, used more * generally within the main Uniform Resource Identifier (URI) set, which * includes both Uniform Resource Locator (URL) and Uniform Resource Name (URN). * As such it is also used in the preparation of data of the * "application/x-www-form-urlencoded" media type, as is often used in the * submission of HTML form data in HTTP requests.
* See http://en.wikipedia.org/wiki/Percent-encoding. *

* Some of this code has been copied from the bixo project * src/main/java/bixo/urldb/SimpleUrlNormalizer.java. */ public class PercentCodec { // !...*...'...(...)...;...:...@...&...=...+...$...,.../...?...%...#...[ // %21 %2A %27 %28 %29 %3B %3A %40 %26 %3D %2B %24 %2C %2F %3F %25 %23 %5B // ] // %5D // You only need to encode "reserved purpose" characters, and that sub-set // of the reserved chars varies depending upon the protocol and the // component. Since we only are really worried about normalizing http(s) // URLs Not really sure about ':' and '?' being reserved in queries private private static final String RESERVED_QUERY_CHARS = "%&;=:?#"; private static final String RESERVED_PATH_CHARS = "%/?#"; private static final String HEX_CODES = "0123456789abcdefABCDEF"; public String encodePathPart(final String pathPart) { return encode(pathPart, RESERVED_PATH_CHARS); } public String encodeQueryComponent(final String queryComponent) { return encode(queryComponent, RESERVED_QUERY_CHARS); } public String encode(final String component, final String reservedChars) { final StringBuilder result = new StringBuilder(); for (int i = 0; i < component.length();) { final int codePoint = component.codePointAt(i); if (codePoint == 0x0020) { result.append('+'); } else if (codePoint >= 0x007F) { result.append(encodeCodePoint(codePoint)); } else if ((codePoint < 0x0020) || (reservedChars.indexOf((char) codePoint) != -1)) { result.append(String.format("%%%02x", codePoint)); } else { result.append((char) codePoint); } i += Character.charCount(codePoint); } return result.toString(); } public String decode(final String url) { // FUTURE - handle unsupported %uHHHH sequences for Unicode code points. // FUTURE - detect & handle incorrectly encoded URLs // First, try to catch unescaped '%' characters. final String result = escapeIsolatedPercentSigns(url); try { return URLDecoder.decode(result, Charset.defaultCharset().toString()); } catch (final UnsupportedEncodingException e) { throw new IllegalStateException("Unexpected exception during URL decoding", e); } } private static String encodeCodePoint(final int codepoint) { try { final int[] codepoints = { codepoint }; final byte[] bytes = new String(codepoints, 0, 1).getBytes(Charset.defaultCharset().toString()); final StringBuilder result = new StringBuilder(); for (final byte value : bytes) { result.append(String.format("%%%02x", value)); } return result.toString(); } catch (final UnsupportedEncodingException e) { throw new IllegalStateException("Unexpected exception during URL encoding", e); } } private static String escapeIsolatedPercentSigns(final String in) { StringBuilder builder = null; int offset = 0; int lastOffset = 0; while ((offset = in.indexOf('%', offset)) != -1) { offset += 1; if (offset > in.length() - 2 || HEX_CODES.indexOf(in.charAt(offset)) == -1 || HEX_CODES.indexOf(in.charAt(offset + 1)) == -1) { if (null == builder) { builder = new StringBuilder(); } builder.append(in.substring(lastOffset, offset)); builder.append("25"); lastOffset = offset; } } if (null != builder) { builder.append(in.substring(lastOffset)); return builder.toString(); } return in; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy