com.linecorp.armeria.internal.PathAndQuery Maven / Gradle / Ivy
Show all versions of armeria-shaded Show documentation
/*
* Copyright 2017 LINE Corporation
*
* LINE Corporation licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package com.linecorp.armeria.internal;
import static io.netty.util.internal.StringUtil.decodeHexNibble;
import static java.util.Objects.requireNonNull;
import java.util.BitSet;
import java.util.Objects;
import java.util.Set;
import javax.annotation.Nullable;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.linecorp.armeria.common.Flags;
import com.linecorp.armeria.common.metric.MeterIdPrefix;
import com.linecorp.armeria.internal.metric.CaffeineMetricSupport;
import io.micrometer.core.instrument.MeterRegistry;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
/**
* A parser of the raw path and query components of an HTTP path. Performs validation and allows caching of
* results.
*/
public final class PathAndQuery {
private static final PathAndQuery ROOT_PATH_QUERY = new PathAndQuery("/", null);
private static final BitSet ALLOWED_PATH_CHARS = new BitSet();
private static final BitSet ALLOWED_QUERY_CHARS = new BitSet();
static {
final String allowedPathChars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=";
for (int i = 0; i < allowedPathChars.length(); i++) {
ALLOWED_PATH_CHARS.set(allowedPathChars.charAt(i));
}
final String allowedQueryChars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*,;=";
for (int i = 0; i < allowedQueryChars.length(); i++) {
ALLOWED_QUERY_CHARS.set(allowedQueryChars.charAt(i));
}
}
private static final Bytes EMPTY_QUERY = new Bytes(0);
private static final Bytes ROOT_PATH = new Bytes(new byte[] { '/' });
/**
* A special byte which tells {@link #encodeToPercents(Bytes, boolean)} to translate it to
* {@code "%26"}.
*/
private static final int ENCODED_AMPERSAND = 0xFD;
/**
* A special byte which tells {@link #encodeToPercents(Bytes, boolean)} to translate it to
* {@code "%3B"}.
*/
private static final int ENCODED_SEMICOLON = 0xFE;
/**
* A special byte which tells {@link #encodeToPercents(Bytes, boolean)} to translate it to
* {@code "%3D"}.
*/
private static final int ENCODED_EQUAL = 0xFF;
@Nullable
private static final Cache CACHE =
Flags.parsedPathCacheSpec().map(PathAndQuery::buildCache).orElse(null);
private static Cache buildCache(String spec) {
return Caffeine.from(spec).build();
}
public static void registerMetrics(MeterRegistry registry, MeterIdPrefix idPrefix) {
if (CACHE != null) {
CaffeineMetricSupport.setup(registry, idPrefix, CACHE);
}
}
/**
* Clears the currently cached parsed paths. Only for use in tests.
*/
@VisibleForTesting
public static void clearCachedPaths() {
requireNonNull(CACHE, "CACHE");
CACHE.asMap().clear();
}
/**
* Returns paths that have had their parse result cached. Only for use in tests.
*/
@VisibleForTesting
public static Set cachedPaths() {
requireNonNull(CACHE, "CACHE");
return CACHE.asMap().keySet();
}
/**
* Validates the {@link String} that contains an absolute path and a query, and splits them into
* the path part and the query part. If the path is usable (e.g., can be served a successful response from
* the server and doesn't have variable path parameters), {@link PathAndQuery#storeInCache(String)} should
* be called to cache the parsing result for faster future invocations.
*
* @return a {@link PathAndQuery} with the absolute path and query, or {@code null} if the specified
* {@link String} is not an absolute path or invalid.
*/
@Nullable
public static PathAndQuery parse(@Nullable String rawPath) {
if (CACHE != null && rawPath != null) {
final PathAndQuery parsed = CACHE.getIfPresent(rawPath);
if (parsed != null) {
return parsed;
}
}
return splitPathAndQuery(rawPath);
}
/**
* Stores this {@link PathAndQuery} into cache for the given raw path. This should be used by callers when
* the parsed result was valid (e.g., when a server is able to successfully handle the parsed path).
*/
public void storeInCache(@Nullable String rawPath) {
if (CACHE != null && rawPath != null) {
CACHE.put(rawPath, this);
}
}
private final String path;
@Nullable
private final String query;
private PathAndQuery(String path, @Nullable String query) {
this.path = path;
this.query = query;
}
public String path() {
return path;
}
@Nullable
public String query() {
return query;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof PathAndQuery)) {
return false;
}
final PathAndQuery that = (PathAndQuery) o;
return Objects.equals(path, that.path) &&
Objects.equals(query, that.query);
}
@Override
public int hashCode() {
return Objects.hash(path, query);
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("path", path)
.add("query", query)
.toString();
}
@Nullable
private static PathAndQuery splitPathAndQuery(@Nullable final String pathAndQuery) {
final Bytes path;
final Bytes query;
if (pathAndQuery == null) {
return ROOT_PATH_QUERY;
}
// Split by the first '?'.
final int queryPos = pathAndQuery.indexOf('?');
if (queryPos >= 0) {
if ((path = decodePercentsAndEncodeToUtf8(
pathAndQuery, 0, queryPos, true)) == null) {
return null;
}
if ((query = decodePercentsAndEncodeToUtf8(
pathAndQuery, queryPos + 1, pathAndQuery.length(), false)) == null) {
return null;
}
} else {
if ((path = decodePercentsAndEncodeToUtf8(
pathAndQuery, 0, pathAndQuery.length(), true)) == null) {
return null;
}
query = null;
}
if (path.data[0] != '/') {
// Do not accept a relative path.
return null;
}
// Reject the prohibited patterns.
if (firstPathComponentContainsColon(path) || pathContainsDoubleDots(path)) {
return null;
}
return new PathAndQuery(encodeToPercents(path, true),
query != null ? encodeToPercents(query, false) : null);
}
@Nullable
private static Bytes decodePercentsAndEncodeToUtf8(String value, int start, int end, boolean isPath) {
final int length = end - start;
if (length == 0) {
return isPath ? ROOT_PATH : EMPTY_QUERY;
}
final Bytes buf = new Bytes(Math.max(length * 3 / 2, 4));
boolean wasSlash = false;
for (final CodePointIterator i = new CodePointIterator(value, start, end); i.hasNextCodePoint();) {
final int pos = i.position();
final int cp = i.nextCodePoint();
if (cp == '%') {
final int hexEnd = pos + 3;
if (hexEnd > end) {
// '%' or '%x' (must be followed by two hexadigits)
return null;
}
final int digit1 = decodeHexNibble(value.charAt(pos + 1));
final int digit2 = decodeHexNibble(value.charAt(pos + 2));
if (digit1 < 0 || digit2 < 0) {
// The first or second digit is not hexadecimal.
return null;
}
final int decoded = (digit1 << 4) | digit2;
if (isPath) {
if (appendOneByte(buf, decoded, wasSlash, isPath)) {
wasSlash = decoded == '/';
} else {
return null;
}
} else {
// If query:
if (decoded == '&') {
// Insert a special mark 'ENCODED_AMPERSAND' so we can distinguish '&' and '%26'
// in a query string. We will encode 'ENCODED_AMPERSAND' back into '%26' later.
buf.ensure(1);
buf.add((byte) ENCODED_AMPERSAND);
wasSlash = false;
} else if (decoded == ';') {
// Insert a special mark 'ENCODED_SEMICOLON' so we can distinguish ';' and '%3D'
// in a query string. We will encode 'ENCODED_SEMICOLON' back into '%3D' later.
buf.ensure(1);
buf.add((byte) ENCODED_SEMICOLON);
wasSlash = false;
} else if (decoded == '=') {
// Insert a special mark 'ENCODED_EQUAL' so we can distinguish '=' and '%3D'
// in a query string. We will encode 'ENCODED_EQUAL' back into '%3D' later.
buf.ensure(1);
buf.add((byte) ENCODED_EQUAL);
wasSlash = false;
} else if (appendOneByte(buf, decoded, wasSlash, isPath)) {
wasSlash = decoded == '/';
} else {
return null;
}
}
i.position(hexEnd);
continue;
}
if (cp == '+' && !isPath) {
buf.ensure(1);
buf.add((byte) ' ');
wasSlash = false;
continue;
}
if (cp <= 0x7F) {
if (!appendOneByte(buf, cp, wasSlash, isPath)) {
return null;
}
wasSlash = cp == '/';
continue;
}
if (cp <= 0x7ff) {
buf.ensure(2);
buf.add((byte) ((cp >>> 6) | 0b110_00000));
buf.add((byte) (cp & 0b111111 | 0b10_000000));
} else if (cp <= 0xffff) {
buf.ensure(3);
buf.add((byte) ((cp >>> 12) | 0b1110_0000));
buf.add((byte) (((cp >>> 6) & 0b111111) | 0b10_000000));
buf.add((byte) ((cp & 0b111111) | 0b10_000000));
} else if (cp <= 0x1fffff) {
buf.ensure(4);
buf.add((byte) ((cp >>> 18) | 0b11110_000));
buf.add((byte) (((cp >>> 12) & 0b111111) | 0b10_000000));
buf.add((byte) (((cp >>> 6) & 0b111111) | 0b10_000000));
buf.add((byte) ((cp & 0b111111) | 0b10_000000));
} else if (cp <= 0x3ffffff) {
// A valid unicode character will never reach here, but for completeness.
// http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML018/0330.html
buf.ensure(5);
buf.add((byte) ((cp >>> 24) | 0b111110_00));
buf.add((byte) (((cp >>> 18) & 0b111111) | 0b10_000000));
buf.add((byte) (((cp >>> 12) & 0b111111) | 0b10_000000));
buf.add((byte) (((cp >>> 6) & 0b111111) | 0b10_000000));
buf.add((byte) ((cp & 0b111111) | 0b10_000000));
} else {
// A valid unicode character will never reach here, but for completeness.
// http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML018/0330.html
buf.ensure(6);
buf.add((byte) ((cp >>> 30) | 0b1111110_0));
buf.add((byte) (((cp >>> 24) & 0b111111) | 0b10_000000));
buf.add((byte) (((cp >>> 18) & 0b111111) | 0b10_000000));
buf.add((byte) (((cp >>> 12) & 0b111111) | 0b10_000000));
buf.add((byte) (((cp >>> 6) & 0b111111) | 0b10_000000));
buf.add((byte) ((cp & 0b111111) | 0b10_000000));
}
wasSlash = false;
}
return buf;
}
private static boolean appendOneByte(Bytes buf, int cp, boolean wasSlash, boolean isPath) {
if (cp == 0x7F) {
// Reject the control character: 0x7F
return false;
}
if (cp >>> 5 == 0) {
// Reject the control characters: 0x00..0x1F
if (isPath) {
return false;
} else if (cp != 0x0A && cp != 0x0D && cp != 0x09) {
// .. except 0x0A (LF), 0x0D (CR) and 0x09 (TAB) because they are used in a form.
return false;
}
}
if (cp == '/' && isPath) {
if (!wasSlash) {
buf.ensure(1);
buf.add((byte) '/');
} else {
// Remove the consecutive slashes: '/path//with///consecutive////slashes'.
}
} else {
buf.ensure(1);
buf.add((byte) cp);
}
return true;
}
/**
* According to RFC 3986 section 3.3, path can contain a colon, except the first segment.
*
* Should allow the asterisk character in the path, query, or fragment components of a URL(RFC2396).
* @see RFC 3986, section 3.3
*/
private static boolean firstPathComponentContainsColon(Bytes path) {
final int length = path.length;
for (int i = 1; i < length; i++) {
final byte b = path.data[i];
if (b == '/') {
break;
}
if (b == ':') {
return true;
}
}
return false;
}
private static boolean pathContainsDoubleDots(Bytes path) {
final int length = path.length;
byte b0 = 0;
byte b1 = 0;
byte b2 = '/';
for (int i = 1; i < length; i++) {
final byte b3 = path.data[i];
if (b3 == '/' && b2 == '.' && b1 == '.' && b0 == '/') {
return true;
}
b0 = b1;
b1 = b2;
b2 = b3;
}
return b0 == '/' && b1 == '.' && b2 == '.';
}
private static String encodeToPercents(Bytes value, boolean isPath) {
final BitSet allowedChars = isPath ? ALLOWED_PATH_CHARS : ALLOWED_QUERY_CHARS;
final int length = value.length;
boolean needsEncoding = false;
for (int i = 0; i < length; i++) {
if (!allowedChars.get(value.data[i] & 0xFF)) {
needsEncoding = true;
break;
}
}
if (!needsEncoding) {
// Deprecated, but it fits perfect for our use case.
//noinspection deprecation
return new String(value.data, 0, 0, length);
}
final StringBuilder buf = new StringBuilder(length);
for (int i = 0; i < length; i++) {
final int b = value.data[i] & 0xFF;
if (allowedChars.get(b)) {
buf.append((char) b);
} else if (b == '+' && !isPath) {
buf.append("%2B");
} else if (b == ' ') {
if (isPath) {
buf.append("%20");
} else {
buf.append('+');
}
} else if (b == ENCODED_AMPERSAND) {
buf.append("%26");
} else if (b == ENCODED_SEMICOLON) {
buf.append("%3B");
} else if (b == ENCODED_EQUAL) {
buf.append("%3D");
} else {
buf.append('%');
appendHexNibble(buf, b >>> 4);
appendHexNibble(buf, b & 0xF);
}
}
return buf.toString();
}
private static void appendHexNibble(StringBuilder buf, int nibble) {
if (nibble < 10) {
buf.append((char) ('0' + nibble));
} else {
buf.append((char) ('A' + nibble - 10));
}
}
private static final class Bytes {
byte[] data;
int length;
Bytes(int initialCapacity) {
data = new byte[initialCapacity];
}
Bytes(byte[] data) {
this.data = data;
length = data.length;
}
void add(byte b) {
data[length++] = b;
}
void ensure(int numBytes) {
int newCapacity = length + numBytes;
if (newCapacity <= data.length) {
return;
}
newCapacity =
(int) Math.max(Math.min((long) data.length + (data.length >> 1), Arrays.MAX_ARRAY_SIZE),
newCapacity);
data = ByteArrays.forceCapacity(data, newCapacity, length);
}
}
private static final class CodePointIterator {
private final CharSequence str;
private final int end;
private int pos;
CodePointIterator(CharSequence str, int start, int end) {
this.str = str;
this.end = end;
pos = start;
}
int position() {
return pos;
}
void position(int pos) {
this.pos = pos;
}
boolean hasNextCodePoint() {
return pos < end;
}
int nextCodePoint() {
assert pos < end;
final char c1 = str.charAt(pos++);
if (Character.isHighSurrogate(c1) && pos < end) {
final char c2 = str.charAt(pos);
if (Character.isLowSurrogate(c2)) {
pos++;
return Character.toCodePoint(c1, c2);
}
}
return c1;
}
}
}