META-INF.modules.java.base.classes.java.net.URLDecoder Maven / Gradle / Ivy
Show all versions of java.base Show documentation
/*
* Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.net;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Objects;
/**
* Utility class for HTML form decoding. This class contains static methods
* for decoding a String from the application/x-www-form-urlencoded
* MIME format.
*
* The conversion process is the reverse of that used by the URLEncoder class. It is assumed
* that all characters in the encoded string are one of the following:
* "{@code a}" through "{@code z}",
* "{@code A}" through "{@code Z}",
* "{@code 0}" through "{@code 9}", and
* "{@code -}", "{@code _}",
* "{@code .}", and "{@code *}". The
* character "{@code %}" is allowed but is interpreted
* as the start of a special escaped sequence.
*
* The following rules are applied in the conversion:
*
*
* - The alphanumeric characters "{@code a}" through
* "{@code z}", "{@code A}" through
* "{@code Z}" and "{@code 0}"
* through "{@code 9}" remain the same.
*
- The special characters "{@code .}",
* "{@code -}", "{@code *}", and
* "{@code _}" remain the same.
*
- The plus sign "{@code +}" is converted into a
* space character " " .
*
- A sequence of the form "{@code %xy}" will be
* treated as representing a byte where xy is the two-digit
* hexadecimal representation of the 8 bits. Then, all substrings
* that contain one or more of these byte sequences consecutively
* will be replaced by the character(s) whose encoding would result
* in those consecutive bytes.
* The encoding scheme used to decode these characters may be specified,
* or if unspecified, the default encoding of the platform will be used.
*
*
* There are two possible ways in which this decoder could deal with
* illegal strings. It could either leave illegal characters alone or
* it could throw an {@link java.lang.IllegalArgumentException}.
* Which approach the decoder takes is left to the
* implementation.
*
* @author Mark Chamness
* @author Michael McCloskey
* @since 1.2
*/
public class URLDecoder {
// The platform default encoding
static String dfltEncName = URLEncoder.dfltEncName;
/**
* Decodes a {@code x-www-form-urlencoded} string.
* The platform's default encoding is used to determine what characters
* are represented by any consecutive sequences of the form
* "{@code %xy}".
* @param s the {@code String} to decode
* @deprecated The resulting string may vary depending on the platform's
* default encoding. Instead, use the decode(String,String) method
* to specify the encoding.
* @return the newly decoded {@code String}
*/
@Deprecated
public static String decode(String s) {
String str = null;
try {
str = decode(s, dfltEncName);
} catch (UnsupportedEncodingException e) {
// The system should always have the platform default
}
return str;
}
/**
* Decodes an {@code application/x-www-form-urlencoded} string using
* a specific encoding scheme.
*
*
* This method behaves the same as {@linkplain decode(String s, Charset charset)}
* except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
* using the given encoding name.
*
* @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
* when illegal strings are encountered.
*
* @param s the {@code String} to decode
* @param enc The name of a supported
* character
* encoding.
* @return the newly decoded {@code String}
* @throws UnsupportedEncodingException
* If character encoding needs to be consulted, but
* named character encoding is not supported
* @see URLEncoder#encode(java.lang.String, java.lang.String)
* @since 1.4
*/
public static String decode(String s, String enc) throws UnsupportedEncodingException {
if (enc.length() == 0) {
throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
}
try {
Charset charset = Charset.forName(enc);
return decode(s, charset);
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
throw new UnsupportedEncodingException(enc);
}
}
/**
* Decodes an {@code application/x-www-form-urlencoded} string using
* a specific {@linkplain java.nio.charset.Charset Charset}.
* The supplied charset is used to determine
* what characters are represented by any consecutive sequences of the
* form "{@code %xy}".
*
* Note: The
* World Wide Web Consortium Recommendation states that
* UTF-8 should be used. Not doing so may introduce
* incompatibilities.
*
* @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
* when illegal strings are encountered.
*
* @param s the {@code String} to decode
* @param charset the given charset
* @return the newly decoded {@code String}
* @throws NullPointerException if {@code s} or {@code charset} is {@code null}
* @throws IllegalArgumentException if the implementation encounters illegal
* characters
* @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
* @since 10
*/
public static String decode(String s, Charset charset) {
Objects.requireNonNull(charset, "Charset");
boolean needToChange = false;
int numChars = s.length();
StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
int i = 0;
char c;
byte[] bytes = null;
while (i < numChars) {
c = s.charAt(i);
switch (c) {
case '+':
sb.append(' ');
i++;
needToChange = true;
break;
case '%':
/*
* Starting with this instance of %, process all
* consecutive substrings of the form %xy. Each
* substring %xy will yield a byte. Convert all
* consecutive bytes obtained this way to whatever
* character(s) they represent in the provided
* encoding.
*/
try {
// (numChars-i)/3 is an upper bound for the number
// of remaining bytes
if (bytes == null)
bytes = new byte[(numChars-i)/3];
int pos = 0;
while ( ((i+2) < numChars) &&
(c=='%')) {
int v = Integer.parseInt(s, i + 1, i + 3, 16);
if (v < 0)
throw new IllegalArgumentException(
"URLDecoder: Illegal hex characters in escape "
+ "(%) pattern - negative value");
bytes[pos++] = (byte) v;
i+= 3;
if (i < numChars)
c = s.charAt(i);
}
// A trailing, incomplete byte encoding such as
// "%x" will cause an exception to be thrown
if ((i < numChars) && (c=='%'))
throw new IllegalArgumentException(
"URLDecoder: Incomplete trailing escape (%) pattern");
sb.append(new String(bytes, 0, pos, charset));
} catch (NumberFormatException e) {
throw new IllegalArgumentException(
"URLDecoder: Illegal hex characters in escape (%) pattern - "
+ e.getMessage());
}
needToChange = true;
break;
default:
sb.append(c);
i++;
break;
}
}
return (needToChange? sb.toString() : s);
}
}