All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.hutool.core.net.URLEncoder Maven / Gradle / Ivy

There is a newer version: 5.8.33
Show newest version
package cn.hutool.core.net;

import cn.hutool.core.util.CharUtil;
import cn.hutool.core.util.HexUtil;
import cn.hutool.core.util.StrUtil;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.BitSet;

/**
 * URL编码,数据内容的类型是 application/x-www-form-urlencoded。
 * TODO 6.x移除此类,使用PercentCodec代替(无法很好区分URL编码和www-form编码)
 *
 * 
 * 1.字符"a"-"z","A"-"Z","0"-"9",".","-","*",和"_" 都不会被编码;
 * 2.将空格转换为%20 ;
 * 3.将非文本内容转换成"%xy"的形式,xy是两位16进制的数值;
 * 
* * @author looly * @see cn.hutool.core.codec.PercentCodec * @deprecated 此类中的方法并不规范,请使用 {@link RFC3986} */ @Deprecated public class URLEncoder implements Serializable { private static final long serialVersionUID = 1L; // --------------------------------------------------------------------------------------------- Static method start /** * 默认URLEncoder
* 默认的编码器针对URI路径编码,定义如下: * *
	 * default = pchar / "/"
	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@"
	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
	 * 
*/ public static final URLEncoder DEFAULT = createDefault(); /** * URL的Path的每一个Segment URLEncoder
* 默认的编码器针对URI路径编码,定义如下: * *
	 * pchar = unreserved / pct-encoded / sub-delims / ":"(非空segment不包含:) / "@"
	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
	 * 
* * 定义见:https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3 */ public static final URLEncoder PATH_SEGMENT = createPathSegment(); /** * URL的Fragment URLEncoder
* 默认的编码器针对Fragment,定义如下: * *
	 * fragment    = *( pchar / "/" / "?" )
	 * pchar       = unreserved / pct-encoded / sub-delims / ":" / "@"
	 * unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
	 * sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
	 * 
* * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 * @since 5.7.13 */ public static final URLEncoder FRAGMENT = createFragment(); /** * 用于查询语句的URLEncoder
* 编码器针对URI路径编码,定义如下: * *
	 * 0x20 ' ' =》 '+'
	 * 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is
	 * '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' Also '=' and '&' 不编码
	 * 其它编码为 %nn 形式
	 * 
*

* 详细见:https://www.w3.org/TR/html5/forms.html#application/x-www-form-urlencoded-encoding-algorithm */ public static final URLEncoder QUERY = createQuery(); /** * 全编码的URLEncoder
*

	 *  0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is
	 *  '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' 不编码
	 *  其它编码为 %nn 形式
	 * 
*/ public static final URLEncoder ALL = createAll(); /** * 创建默认URLEncoder
* 默认的编码器针对URI路径编码,定义如下: * *
	 * default = pchar / "/"
	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@"
	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
	 * 
* * @return URLEncoder */ public static URLEncoder createDefault() { final URLEncoder encoder = new URLEncoder(); encoder.addSafeCharacter('-'); encoder.addSafeCharacter('.'); encoder.addSafeCharacter('_'); encoder.addSafeCharacter('~'); // Add the sub-delims addSubDelims(encoder); // Add the remaining literals encoder.addSafeCharacter(':'); encoder.addSafeCharacter('@'); // Add '/' so it isn't encoded when we encode a path encoder.addSafeCharacter('/'); return encoder; } /** * URL的Path的每一个Segment URLEncoder
* 默认的编码器针对URI路径的每一段编码,定义如下: * *
	 * pchar = unreserved / pct-encoded / sub-delims / ":"(非空segment不包含:) / "@"
	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
	 * 
* * 定义见:https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3 * * @return URLEncoder */ public static URLEncoder createPathSegment() { final URLEncoder encoder = new URLEncoder(); // unreserved encoder.addSafeCharacter('-'); encoder.addSafeCharacter('.'); encoder.addSafeCharacter('_'); encoder.addSafeCharacter('~'); // Add the sub-delims addSubDelims(encoder); // Add the remaining literals //non-zero-length segment without any colon ":" //encoder.addSafeCharacter(':'); encoder.addSafeCharacter('@'); return encoder; } /** * URL的Fragment URLEncoder
* 默认的编码器针对Fragment,定义如下: * *
	 * fragment    = *( pchar / "/" / "?" )
	 * pchar       = unreserved / pct-encoded / sub-delims / ":" / "@"
	 * unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
	 * sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
	 * 
* * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 * * @return URLEncoder * @since 5.7.13 */ public static URLEncoder createFragment() { final URLEncoder encoder = new URLEncoder(); encoder.addSafeCharacter('-'); encoder.addSafeCharacter('.'); encoder.addSafeCharacter('_'); encoder.addSafeCharacter('~'); // Add the sub-delims addSubDelims(encoder); // Add the remaining literals encoder.addSafeCharacter(':'); encoder.addSafeCharacter('@'); encoder.addSafeCharacter('/'); encoder.addSafeCharacter('?'); return encoder; } /** * 创建用于查询语句的URLEncoder
* 编码器针对URI路径编码,定义如下: * *
	 * 0x20 ' ' =》 '+'
	 * 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is
	 * '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' Also '=' and '&' 不编码
	 * 其它编码为 %nn 形式
	 * 
*

* 详细见:https://www.w3.org/TR/html5/forms.html#application/x-www-form-urlencoded-encoding-algorithm * * @return URLEncoder */ public static URLEncoder createQuery() { final URLEncoder encoder = new URLEncoder(); // Special encoding for space encoder.setEncodeSpaceAsPlus(true); // Alpha and digit are safe by default // Add the other permitted characters encoder.addSafeCharacter('*'); encoder.addSafeCharacter('-'); encoder.addSafeCharacter('.'); encoder.addSafeCharacter('_'); encoder.addSafeCharacter('='); encoder.addSafeCharacter('&'); return encoder; } /** * 创建URLEncoder
* 编码器针对URI路径编码,定义如下: * *

	 * 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is
	 * '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' 不编码
	 * 其它编码为 %nn 形式
	 * 
*

* 详细见:https://www.w3.org/TR/html5/forms.html#application/x-www-form-urlencoded-encoding-algorithm * * @return URLEncoder */ public static URLEncoder createAll() { final URLEncoder encoder = new URLEncoder(); encoder.addSafeCharacter('*'); encoder.addSafeCharacter('-'); encoder.addSafeCharacter('.'); encoder.addSafeCharacter('_'); return encoder; } // --------------------------------------------------------------------------------------------- Static method end /** * 存放安全编码 */ private final BitSet safeCharacters; /** * 是否编码空格为+ */ private boolean encodeSpaceAsPlus = false; /** * 构造
* [a-zA-Z0-9]默认不被编码 */ public URLEncoder() { this(new BitSet(256)); // unreserved addAlpha(); addDigit(); } /** * 构造 * * @param safeCharacters 安全字符,安全字符不被编码 */ private URLEncoder(BitSet safeCharacters) { this.safeCharacters = safeCharacters; } /** * 增加安全字符
* 安全字符不被编码 * * @param c 字符 */ public void addSafeCharacter(char c) { safeCharacters.set(c); } /** * 移除安全字符
* 安全字符不被编码 * * @param c 字符 */ public void removeSafeCharacter(char c) { safeCharacters.clear(c); } /** * 是否将空格编码为+ * * @param encodeSpaceAsPlus 是否将空格编码为+ */ public void setEncodeSpaceAsPlus(boolean encodeSpaceAsPlus) { this.encodeSpaceAsPlus = encodeSpaceAsPlus; } /** * 将URL中的字符串编码为%形式 * * @param path 需要编码的字符串 * @param charset 编码, {@code null}返回原字符串,表示不编码 * @return 编码后的字符串 */ public String encode(String path, Charset charset) { if (null == charset || StrUtil.isEmpty(path)) { return path; } final StringBuilder rewrittenPath = new StringBuilder(path.length()); ByteArrayOutputStream buf = new ByteArrayOutputStream(); OutputStreamWriter writer = new OutputStreamWriter(buf, charset); int c; for (int i = 0; i < path.length(); i++) { c = path.charAt(i); if (safeCharacters.get(c)) { rewrittenPath.append((char) c); } else if (encodeSpaceAsPlus && c == CharUtil.SPACE) { // 对于空格单独处理 rewrittenPath.append('+'); } else { // convert to external encoding before hex conversion try { writer.write((char) c); writer.flush(); } catch (IOException e) { buf.reset(); continue; } byte[] ba = buf.toByteArray(); for (byte toEncode : ba) { // Converting each byte in the buffer rewrittenPath.append('%'); HexUtil.appendHex(rewrittenPath, toEncode, false); } buf.reset(); } } return rewrittenPath.toString(); } /** * 增加安全字符[a-z][A-Z] */ private void addAlpha() { for (char i = 'a'; i <= 'z'; i++) { addSafeCharacter(i); } for (char i = 'A'; i <= 'Z'; i++) { addSafeCharacter(i); } } /** * 增加数字1-9 */ private void addDigit() { for (char i = '0'; i <= '9'; i++) { addSafeCharacter(i); } } /** * 增加sub-delims
* sub-delims = "!" / "$" / "&" / "'" / "(" / ") / "*" / "+" / "," / ";" / "=" * 定义见:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 */ private static void addSubDelims(URLEncoder encoder){ // Add the sub-delims encoder.addSafeCharacter('!'); encoder.addSafeCharacter('$'); encoder.addSafeCharacter('&'); encoder.addSafeCharacter('\''); encoder.addSafeCharacter('('); encoder.addSafeCharacter(')'); encoder.addSafeCharacter('*'); encoder.addSafeCharacter('+'); encoder.addSafeCharacter(','); encoder.addSafeCharacter(';'); encoder.addSafeCharacter('='); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy