All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.netty5.handler.codec.http.QueryStringEncoder Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2012 The Netty Project
 *
 * The Netty Project licenses this file to you under the Apache License,
 * version 2.0 (the "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at:
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package io.netty5.handler.codec.http;

import io.netty5.util.internal.StringUtil;

import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.Charset;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

/**
 * Creates a URL-encoded URI from a path string and key-value parameter pairs.
 * This encoder is for one time use only.  Create a new instance for each URI.
 *
 * 
 * {@link QueryStringEncoder} encoder = new {@link QueryStringEncoder}("/hello");
 * encoder.addParam("recipient", "world");
 * assert encoder.toString().equals("/hello?recipient=world");
 * 
* * @see QueryStringDecoder */ public class QueryStringEncoder { private final Charset charset; private final StringBuilder uriBuilder; private boolean hasParams; private static final byte WRITE_UTF_UNKNOWN = (byte) '?'; private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray(); /** * Creates a new encoder that encodes a URI that starts with the specified * path string. The encoder will encode the URI in UTF-8. */ public QueryStringEncoder(String uri) { this(uri, HttpConstants.DEFAULT_CHARSET); } /** * Creates a new encoder that encodes a URI that starts with the specified * path string in the specified charset. */ public QueryStringEncoder(String uri, Charset charset) { requireNonNull(charset, "charset"); uriBuilder = new StringBuilder(uri); this.charset = UTF_8.equals(charset) ? null : charset; } /** * Adds a parameter with the specified name and value to this encoder. */ public void addParam(String name, String value) { requireNonNull(name, "name"); if (hasParams) { uriBuilder.append('&'); } else { uriBuilder.append('?'); hasParams = true; } encodeComponent(name); if (value != null) { uriBuilder.append('='); encodeComponent(value); } } private void encodeComponent(CharSequence s) { if (charset == null) { encodeUtf8Component(s); } else { encodeNonUtf8Component(s); } } /** * Returns the URL-encoded URI object which was created from the path string * specified in the constructor and the parameters added by * {@link #addParam(String, String)} method. */ public URI toUri() throws URISyntaxException { return new URI(toString()); } /** * Returns the URL-encoded URI which was created from the path string * specified in the constructor and the parameters added by * {@link #addParam(String, String)} method. */ @Override public String toString() { return uriBuilder.toString(); } /** * Encode the String as per RFC 3986, Section 2. *

* There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}. * The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20} * beyond that, this method reuse the {@link #uriBuilder} in this class rather than create a new one, * thus generates less garbage for the GC. * * @param s The String to encode */ private void encodeNonUtf8Component(CharSequence s) { //Don't allocate memory until needed char[] buf = null; for (int i = 0, len = s.length(); i < len;) { char c = s.charAt(i); if (dontNeedEncoding(c)) { uriBuilder.append(c); i++; } else { int index = 0; if (buf == null) { buf = new char[s.length() - i]; } do { buf[index] = c; index++; i++; } while (i < s.length() && !dontNeedEncoding(c = s.charAt(i))); byte[] bytes = new String(buf, 0, index).getBytes(charset); for (byte b : bytes) { appendEncoded(b); } } } } private void encodeUtf8Component(CharSequence s) { for (int i = 0, len = s.length(); i < len; i++) { char c = s.charAt(i); if (!dontNeedEncoding(c)) { encodeUtf8Component(s, i, len); return; } } uriBuilder.append(s); } private void encodeUtf8Component(CharSequence s, int encodingStart, int len) { if (encodingStart > 0) { // Append non-encoded characters directly first. uriBuilder.append(s, 0, encodingStart); } encodeUtf8ComponentSlow(s, encodingStart, len); } private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) { for (int i = start; i < len; i++) { char c = s.charAt(i); if (c < 0x80) { if (dontNeedEncoding(c)) { uriBuilder.append(c); } else { appendEncoded(c); } } else if (c < 0x800) { appendEncoded(0xc0 | c >> 6); appendEncoded(0x80 | c & 0x3f); } else if (StringUtil.isSurrogate(c)) { if (!Character.isHighSurrogate(c)) { appendEncoded(WRITE_UTF_UNKNOWN); continue; } // Surrogate Pair consumes 2 characters. if (++i == s.length()) { appendEncoded(WRITE_UTF_UNKNOWN); break; } // Extra method to allow inlining the rest of writeUtf8 which is the most likely code path. writeUtf8Surrogate(c, s.charAt(i)); } else { appendEncoded(0xe0 | c >> 12); appendEncoded(0x80 | c >> 6 & 0x3f); appendEncoded(0x80 | c & 0x3f); } } } private void writeUtf8Surrogate(char c, char c2) { if (!Character.isLowSurrogate(c2)) { appendEncoded(WRITE_UTF_UNKNOWN); appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2); return; } int codePoint = Character.toCodePoint(c, c2); // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630. appendEncoded(0xf0 | codePoint >> 18); appendEncoded(0x80 | codePoint >> 12 & 0x3f); appendEncoded(0x80 | codePoint >> 6 & 0x3f); appendEncoded(0x80 | codePoint & 0x3f); } private void appendEncoded(int b) { uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b)); } /** * Convert the given digit to an upper hexadecimal char. * * @param digit the number to convert to a character. * @return the {@code char} representation of the specified digit * in hexadecimal. */ private static char forDigit(int digit) { return CHAR_MAP[digit & 0xF]; } /** * Determines whether the given character is an unreserved character. *

* unreserved characters do not need to be encoded, and include uppercase and lowercase * letters, decimal digits, hyphen, period, underscore, and tilde. *

* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / "*" * * @param ch the char to be judged whether it need to be encoded * @return true or false */ private static boolean dontNeedEncoding(char ch) { return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9' || ch == '-' || ch == '_' || ch == '.' || ch == '*' || ch == '~'; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy