io.netty.handler.codec.http.HttpHeaderValidationUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of artemis-jakarta-client-all
There is a newer version: 2.38.0
/*
 * Copyright 2022 The Netty Project
 *
 * The Netty Project licenses this file to you under the Apache License,
 * version 2.0 (the "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at:
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package io.netty.handler.codec.http;

import io.netty.util.AsciiString;
import io.netty.util.internal.UnstableApi;

import static io.netty.util.AsciiString.contentEqualsIgnoreCase;

/**
 * Functions used to perform various validations of HTTP header names and values.
 */
@UnstableApi
public final class HttpHeaderValidationUtil {
    private HttpHeaderValidationUtil() {
    }

    /**
     * Check if a header name is "connection related".
     * 
     * The RFC9110 only specify an incomplete
     * list of the following headers:
     *
     * 

     *     Connection
     *     Proxy-Connection
     *     Keep-Alive
     *     TE
     *     Transfer-Encoding
     *     Upgrade
     * 
     *
     * @param name the name of the header to check. The check is case-insensitive.
     * @param ignoreTeHeader {@code true} if the TE header should be ignored by this check.
     * This is relevant for HTTP/2 header validation, where the TE header has special rules.
     * @return {@code true} if the given header name is one of the specified connection-related headers.
     */
    @SuppressWarnings("deprecation") // We need to check for deprecated headers as well.
    public static boolean isConnectionHeader(CharSequence name, boolean ignoreTeHeader) {
        // These are the known standard and non-standard connection related headers:
        // - upgrade (7 chars)
        // - connection (10 chars)
        // - keep-alive (10 chars)
        // - proxy-connection (16 chars)
        // - transfer-encoding (17 chars)
        //
        // See https://datatracker.ietf.org/doc/html/rfc9113#section-8.2.2
        // and https://datatracker.ietf.org/doc/html/rfc9110#section-7.6.1
        // for the list of connection related headers.
        //
        // We scan for these based on the length, then double-check any matching name.
        int len = name.length();
        switch (len) {
            case 2: return ignoreTeHeader? false : contentEqualsIgnoreCase(name, HttpHeaderNames.TE);
            case 7: return contentEqualsIgnoreCase(name, HttpHeaderNames.UPGRADE);
            case 10: return contentEqualsIgnoreCase(name, HttpHeaderNames.CONNECTION) ||
                    contentEqualsIgnoreCase(name, HttpHeaderNames.KEEP_ALIVE);
            case 16: return contentEqualsIgnoreCase(name, HttpHeaderNames.PROXY_CONNECTION);
            case 17: return contentEqualsIgnoreCase(name, HttpHeaderNames.TRANSFER_ENCODING);
            default:
                return false;
        }
    }

    /**
     * If the given header is {@link HttpHeaderNames#TE} and the given header value is not
     * {@link HttpHeaderValues#TRAILERS}, then return {@code true}. Otherwie, {@code false}.
     * 
     * The string comparisons are case-insensitive.
     * 

     * This check is important for HTTP/2 header validation.
     *
     * @param name the header name to check if it is TE or not.
     * @param value the header value to check if it is something other than TRAILERS.
     * @return {@code true} only if the header name is TE, and the header value is not
     * TRAILERS. Otherwise, {@code false}.
     */
    public static boolean isTeNotTrailers(CharSequence name, CharSequence value) {
        if (name.length() == 2) {
            return contentEqualsIgnoreCase(name, HttpHeaderNames.TE) &&
                    !contentEqualsIgnoreCase(value, HttpHeaderValues.TRAILERS);
        }
        return false;
    }

    /**
     * Validate the given HTTP header value by searching for any illegal characters.
     *
     * @param value the HTTP header value to validate.
     * @return the index of the first illegal character found, or {@code -1} if there are none and the header value is
     * valid.
     */
    public static int validateValidHeaderValue(CharSequence value) {
        int length = value.length();
        if (length == 0) {
            return -1;
        }
        if (value instanceof AsciiString) {
            return verifyValidHeaderValueAsciiString((AsciiString) value);
        }
        return verifyValidHeaderValueCharSequence(value);
    }

    private static int verifyValidHeaderValueAsciiString(AsciiString value) {
        // Validate value to field-content rule.
        //  field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
        //  field-vchar    = VCHAR / obs-text
        //  VCHAR          = %x21-7E ; visible (printing) characters
        //  obs-text       = %x80-FF
        //  SP             = %x20
        //  HTAB           = %x09 ; horizontal tab
        //  See: https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
        //  And: https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
        final byte[] array = value.array();
        final int start = value.arrayOffset();
        int b = array[start] & 0xFF;
        if (b < 0x21 || b == 0x7F) {
            return 0;
        }
        int length = value.length();
        for (int i = start + 1; i < length; i++) {
            b = array[i] & 0xFF;
            if (b < 0x20 && b != 0x09 || b == 0x7F) {
                return i - start;
            }
        }
        return -1;
    }

    private static int verifyValidHeaderValueCharSequence(CharSequence value) {
        // Validate value to field-content rule.
        //  field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
        //  field-vchar    = VCHAR / obs-text
        //  VCHAR          = %x21-7E ; visible (printing) characters
        //  obs-text       = %x80-FF
        //  SP             = %x20
        //  HTAB           = %x09 ; horizontal tab
        //  See: https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
        //  And: https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
        int b = value.charAt(0);
        if (b < 0x21 || b == 0x7F || 0xFF < b) {
            return 0;
        }
        int length = value.length();
        for (int i = 1; i < length; i++) {
            b = value.charAt(i);
            if (b < 0x20 && b != 0x09 || b == 0x7F || 0xFF < b) {
                return i;
            }
        }
        return -1;
    }

    /**
     * Validate a token contains only allowed
     * characters.
     * 
     * The token format is used for variety of HTTP
     * components, like  cookie-name,
     * field-name of a
     * header-field, or
     * request method.
     *
     * @param token the token to validate.
     * @return the index of the first invalid token character found, or {@code -1} if there are none.
     */
    public static int validateToken(CharSequence token) {
        if (token instanceof AsciiString) {
            return validateAsciiStringToken((AsciiString) token);
        }
        return validateCharSequenceToken(token);
    }

    /**
     * Validate that an {@link AsciiString} contain onlu valid
     * token characters.
     *
     * @param token the ascii string to validate.
     */
    private static int validateAsciiStringToken(AsciiString token) {
        byte[] array = token.array();
        for (int i = token.arrayOffset(), len = token.arrayOffset() + token.length(); i < len; i++) {
            if (!BitSet128.contains(array[i], TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
                return i - token.arrayOffset();
            }
        }
        return -1;
    }

    /**
     * Validate that a {@link CharSequence} contain onlu valid
     * token characters.
     *
     * @param token the character sequence to validate.
     */
    private static int validateCharSequenceToken(CharSequence token) {
        for (int i = 0, len = token.length(); i < len; i++) {
            byte value = (byte) token.charAt(i);
            if (!BitSet128.contains(value, TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
                return i;
            }
        }
        return -1;
    }

    private static final long TOKEN_CHARS_HIGH;
    private static final long TOKEN_CHARS_LOW;
    static {
        // HEADER
        // header-field   = field-name ":" OWS field-value OWS
        //
        // field-name     = token
        // token          = 1*tchar
        //
        // tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
        //                    / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
        //                    / DIGIT / ALPHA
        //                    ; any VCHAR, except delimiters.
        //  Delimiters are chosen
        //   from the set of US-ASCII visual characters not allowed in a token
        //   (DQUOTE and "(),/:;<=>?@[\]{}")
        //
        // COOKIE
        // cookie-pair       = cookie-name "=" cookie-value
        // cookie-name       = token
        // token          = 1*
        // CTL = 
        // separators     = "(" | ")" | "<" | ">" | "@"
        //                      | "," | ";" | ":" | "\" | <">
        //                      | "/" | "[" | "]" | "?" | "="
        //                      | "{" | "}" | SP | HT
        //
        // field-name's token is equivalent to cookie-name's token, we can reuse the tchar mask for both:
        BitSet128 tokenChars = new BitSet128()
                .range('0', '9').range('a', 'z').range('A', 'Z') // Alphanumeric.
                .bits('-', '.', '_', '~') // Unreserved characters.
                .bits('!', '#', '$', '%', '&', '\'', '*', '+', '^', '`', '|'); // Token special characters.
        TOKEN_CHARS_HIGH = tokenChars.high();
        TOKEN_CHARS_LOW = tokenChars.low();
    }

    private static final class BitSet128 {
        private long high;
        private long low;

        BitSet128 range(char fromInc, char toInc) {
            for (int bit = fromInc; bit <= toInc; bit++) {
                if (bit < 64) {
                    low |= 1L << bit;
                } else {
                    high |= 1L << bit - 64;
                }
            }
            return this;
        }

        BitSet128 bits(char... bits) {
            for (char bit : bits) {
                if (bit < 64) {
                    low |= 1L << bit;
                } else {
                    high |= 1L << bit - 64;
                }
            }
            return this;
        }

        long high() {
            return high;
        }

        long low() {
            return low;
        }

        static boolean contains(byte bit, long high, long low) {
            if (bit < 0) {
                return false;
            }
            if (bit < 64) {
                return 0 != (low & 1L << bit);
            }
            return 0 != (high & 1L << bit - 64);
        }
    }
}