io.netty5.handler.codec.http.QueryStringEncoder Maven / Gradle / Ivy
/*
* Copyright 2012 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty5.handler.codec.http;
import io.netty5.util.internal.StringUtil;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;
/**
* Creates a URL-encoded URI from a path string and key-value parameter pairs.
* This encoder is for one time use only. Create a new instance for each URI.
*
*
* {@link QueryStringEncoder} encoder = new {@link QueryStringEncoder}("/hello");
* encoder.addParam("recipient", "world");
* assert encoder.toString().equals("/hello?recipient=world");
*
*
* @see QueryStringDecoder
*/
public class QueryStringEncoder {
private final Charset charset;
private final StringBuilder uriBuilder;
private boolean hasParams;
private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray();
/**
* Creates a new encoder that encodes a URI that starts with the specified
* path string. The encoder will encode the URI in UTF-8.
*/
public QueryStringEncoder(String uri) {
this(uri, HttpConstants.DEFAULT_CHARSET);
}
/**
* Creates a new encoder that encodes a URI that starts with the specified
* path string in the specified charset.
*/
public QueryStringEncoder(String uri, Charset charset) {
requireNonNull(charset, "charset");
uriBuilder = new StringBuilder(uri);
this.charset = UTF_8.equals(charset) ? null : charset;
}
/**
* Adds a parameter with the specified name and value to this encoder.
*/
public void addParam(String name, String value) {
requireNonNull(name, "name");
if (hasParams) {
uriBuilder.append('&');
} else {
uriBuilder.append('?');
hasParams = true;
}
encodeComponent(name);
if (value != null) {
uriBuilder.append('=');
encodeComponent(value);
}
}
private void encodeComponent(CharSequence s) {
if (charset == null) {
encodeUtf8Component(s);
} else {
encodeNonUtf8Component(s);
}
}
/**
* Returns the URL-encoded URI object which was created from the path string
* specified in the constructor and the parameters added by
* {@link #addParam(String, String)} method.
*/
public URI toUri() throws URISyntaxException {
return new URI(toString());
}
/**
* Returns the URL-encoded URI which was created from the path string
* specified in the constructor and the parameters added by
* {@link #addParam(String, String)} method.
*/
@Override
public String toString() {
return uriBuilder.toString();
}
/**
* Encode the String as per RFC 3986, Section 2.
*
* There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}.
* The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20}
* beyond that, this method reuse the {@link #uriBuilder} in this class rather than create a new one,
* thus generates less garbage for the GC.
*
* @param s The String to encode
*/
private void encodeNonUtf8Component(CharSequence s) {
//Don't allocate memory until needed
char[] buf = null;
for (int i = 0, len = s.length(); i < len;) {
char c = s.charAt(i);
if (dontNeedEncoding(c)) {
uriBuilder.append(c);
i++;
} else {
int index = 0;
if (buf == null) {
buf = new char[s.length() - i];
}
do {
buf[index] = c;
index++;
i++;
} while (i < s.length() && !dontNeedEncoding(c = s.charAt(i)));
byte[] bytes = new String(buf, 0, index).getBytes(charset);
for (byte b : bytes) {
appendEncoded(b);
}
}
}
}
private void encodeUtf8Component(CharSequence s) {
for (int i = 0, len = s.length(); i < len; i++) {
char c = s.charAt(i);
if (!dontNeedEncoding(c)) {
encodeUtf8Component(s, i, len);
return;
}
}
uriBuilder.append(s);
}
private void encodeUtf8Component(CharSequence s, int encodingStart, int len) {
if (encodingStart > 0) {
// Append non-encoded characters directly first.
uriBuilder.append(s, 0, encodingStart);
}
encodeUtf8ComponentSlow(s, encodingStart, len);
}
private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) {
for (int i = start; i < len; i++) {
char c = s.charAt(i);
if (c < 0x80) {
if (dontNeedEncoding(c)) {
uriBuilder.append(c);
} else {
appendEncoded(c);
}
} else if (c < 0x800) {
appendEncoded(0xc0 | c >> 6);
appendEncoded(0x80 | c & 0x3f);
} else if (StringUtil.isSurrogate(c)) {
if (!Character.isHighSurrogate(c)) {
appendEncoded(WRITE_UTF_UNKNOWN);
continue;
}
// Surrogate Pair consumes 2 characters.
if (++i == s.length()) {
appendEncoded(WRITE_UTF_UNKNOWN);
break;
}
// Extra method to allow inlining the rest of writeUtf8 which is the most likely code path.
writeUtf8Surrogate(c, s.charAt(i));
} else {
appendEncoded(0xe0 | c >> 12);
appendEncoded(0x80 | c >> 6 & 0x3f);
appendEncoded(0x80 | c & 0x3f);
}
}
}
private void writeUtf8Surrogate(char c, char c2) {
if (!Character.isLowSurrogate(c2)) {
appendEncoded(WRITE_UTF_UNKNOWN);
appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
return;
}
int codePoint = Character.toCodePoint(c, c2);
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
appendEncoded(0xf0 | codePoint >> 18);
appendEncoded(0x80 | codePoint >> 12 & 0x3f);
appendEncoded(0x80 | codePoint >> 6 & 0x3f);
appendEncoded(0x80 | codePoint & 0x3f);
}
private void appendEncoded(int b) {
uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b));
}
/**
* Convert the given digit to an upper hexadecimal char.
*
* @param digit the number to convert to a character.
* @return the {@code char} representation of the specified digit
* in hexadecimal.
*/
private static char forDigit(int digit) {
return CHAR_MAP[digit & 0xF];
}
/**
* Determines whether the given character is an unreserved character.
*
* unreserved characters do not need to be encoded, and include uppercase and lowercase
* letters, decimal digits, hyphen, period, underscore, and tilde.
*
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / "*"
*
* @param ch the char to be judged whether it need to be encoded
* @return true or false
*/
private static boolean dontNeedEncoding(char ch) {
return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9'
|| ch == '-' || ch == '_' || ch == '.' || ch == '*' || ch == '~';
}
}