org.eclipse.jetty.http.UriCompliance Maven / Gradle / Ivy
//
// ========================================================================
// Copyright (c) 1995 Mort Bay Consulting Pty Ltd and others.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
// ========================================================================
//
package org.eclipse.jetty.http;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Arrays.asList;
import static java.util.Collections.unmodifiableSet;
import static java.util.EnumSet.allOf;
import static java.util.EnumSet.complementOf;
import static java.util.EnumSet.noneOf;
import static java.util.EnumSet.of;
/**
* URI compliance modes for Jetty request handling.
* A Compliance mode consists of a set of {@link Violation}s which are allowed
* when the mode is enabled.
*/
public final class UriCompliance implements ComplianceViolation.Mode
{
private static final Logger LOG = LoggerFactory.getLogger(UriCompliance.class);
/**
* These are URI compliance "violations", which may be allowed by the compliance mode. These are actual
* violations of the RFC, as they represent additional requirements in excess of the strict compliance of
* RFC 3986.
* A compliance mode that contains one or more of these Violations, allows request to violate the corresponding
* additional requirement.
*/
public enum Violation implements ComplianceViolation
{
/**
* Allow ambiguous path segments e.g. /foo/%2e%2e/bar
.
* When allowing this {@code Violation}, the application developer/deployer must ensure that the decoded URI path is not
* passed to any API that may inadvertently normalize dot or double dot segments.
* Any resulting '.' characters in the decoded path should be treated as literal characters.
*/
AMBIGUOUS_PATH_SEGMENT("https://tools.ietf.org/html/rfc3986#section-3.3", "Ambiguous URI path segment"),
/**
* Allow ambiguous empty segments e.g. //
.
* When allowing this {@code Violation}, the application developer/deployer must ensure that the application behaves
* as desired when it receives a URI path containing //
. Specifically, any URI pattern matching for
* security concerns needs to be carefully audited.
*/
AMBIGUOUS_EMPTY_SEGMENT("https://tools.ietf.org/html/rfc3986#section-3.3", "Ambiguous URI empty segment"),
/**
* Allow ambiguous path separator within a URI segment e.g. /foo/b%2fr
* When allowing this {@code Violation}, the application developer/deployer must be aware that the decoded URI path is
* ambiguous and that it is not possible to distinguish in the decoded path a real path separator versus an encoded
* separator character. Any URI matching based on decoded segments may be affected by this ambiguity. It is highly
* recommended that applications using this violation work only with encoded URI paths. Some APIs that return
* decoded paths may throw an exception rather than return such an ambiguous path.
*/
AMBIGUOUS_PATH_SEPARATOR("https://tools.ietf.org/html/rfc3986#section-3.3", "Ambiguous URI path separator"),
/**
* Allow ambiguous path parameters within a URI segment e.g. /foo/..;/bar
or /foo/%2e%2e;param/bar
.
* Since a dot or double dot segment with a parameter will not be normalized, then when allowing this {@code Violation},
* the application developer/deployer must ensure that the decoded URI path is not passed to any API that may
* inadvertently normalize dot or double dot segments.
*/
AMBIGUOUS_PATH_PARAMETER("https://tools.ietf.org/html/rfc3986#section-3.3", "Ambiguous URI path parameter"),
/**
* Allow ambiguous path encoding within a URI segment e.g. /%2557EB-INF
. When allowing this
* {@code Violation}, the deployer must ensure that the decoded URI path is not passed to any API that may inadvertently
* further decode any percent encoded characters. Any resulting `%` character in the decoded path should be treated as
* a literal character.
*/
AMBIGUOUS_PATH_ENCODING("https://tools.ietf.org/html/rfc3986#section-3.3", "Ambiguous URI path encoding"),
/**
* Allow UTF-16 encoding eg /foo%u2192bar
.
*/
UTF16_ENCODINGS("https://www.w3.org/International/iri-edit/draft-duerst-iri.html#anchor29", "UTF-16 encoding"),
/**
* Allow Bad UTF-8 encodings to be substituted by the replacement character.
*/
BAD_UTF8_ENCODING("https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1", "Bad UTF-8 encoding"),
/**
* Allow encoded path characters not allowed by the Servlet spec rules.
*/
SUSPICIOUS_PATH_CHARACTERS("https://jakarta.ee/specifications/servlet/6.0/jakarta-servlet-spec-6.0.html#uri-path-canonicalization", "Suspicious Path Character"),
/**
* Allow path characters not allowed in the path portion of the URI and HTTP specs.
* This would allow characters that fall outside of the {@code unreserved / pct-encoded / sub-delims / ":" / "@"} ABNF
*/
ILLEGAL_PATH_CHARACTERS("https://datatracker.ietf.org/doc/html/rfc3986#section-3.3", "Illegal Path Character"),
/**
* Allow user info in the authority portion of the URI and HTTP specs.
*/
USER_INFO("https://datatracker.ietf.org/doc/html/rfc9110#name-deprecation-of-userinfo-in-", "Deprecated User Info");
private final String _url;
private final String _description;
Violation(String url, String description)
{
_url = url;
_description = description;
}
@Override
public String getName()
{
return name();
}
@Override
public String getURL()
{
return _url;
}
@Override
public String getDescription()
{
return _description;
}
}
public static final Set NO_VIOLATION = Collections.unmodifiableSet(EnumSet.noneOf(Violation.class));
/**
* Set of violations that can trigger a HttpURI.isAmbiguous violation.
*/
public static final Set AMBIGUOUS_VIOLATIONS = Collections.unmodifiableSet(EnumSet.of(
Violation.AMBIGUOUS_EMPTY_SEGMENT,
Violation.AMBIGUOUS_PATH_ENCODING,
Violation.AMBIGUOUS_PATH_PARAMETER,
Violation.AMBIGUOUS_PATH_SEGMENT,
Violation.AMBIGUOUS_PATH_SEPARATOR));
/**
* List of Violations that apply only to the HttpURI.path section.
*/
private static final Set PATH_VIOLATIONS = Collections.unmodifiableSet(EnumSet.of(
Violation.AMBIGUOUS_EMPTY_SEGMENT,
Violation.AMBIGUOUS_PATH_ENCODING,
Violation.AMBIGUOUS_PATH_PARAMETER,
Violation.AMBIGUOUS_PATH_SEGMENT,
Violation.AMBIGUOUS_PATH_SEPARATOR,
Violation.SUSPICIOUS_PATH_CHARACTERS,
Violation.ILLEGAL_PATH_CHARACTERS));
/**
* Compliance mode that exactly follows RFC3986,
* excluding all URI Violations.
*/
public static final UriCompliance RFC3986 = new UriCompliance("RFC3986", noneOf(Violation.class));
/**
* Compliance mode that allows all unambiguous violations.
*/
public static final UriCompliance UNAMBIGUOUS = new UriCompliance("UNAMBIGUOUS",
complementOf(EnumSet.copyOf(AMBIGUOUS_VIOLATIONS)));
/**
* The default compliance mode allows no violations from RFC3986
* and is equivalent to {@link #RFC3986} compliance.
*/
public static final UriCompliance DEFAULT = new UriCompliance("DEFAULT", RFC3986.getAllowed());
/**
* LEGACY compliance mode that models Jetty-9.4 behavior by allowing {@link Violation#AMBIGUOUS_PATH_SEGMENT},
* {@link Violation#AMBIGUOUS_EMPTY_SEGMENT}, {@link Violation#AMBIGUOUS_PATH_SEPARATOR}, {@link Violation#AMBIGUOUS_PATH_ENCODING}
* and {@link Violation#UTF16_ENCODINGS}.
*/
public static final UriCompliance LEGACY = new UriCompliance("LEGACY",
of(Violation.AMBIGUOUS_PATH_SEGMENT,
Violation.AMBIGUOUS_PATH_SEPARATOR,
Violation.AMBIGUOUS_PATH_ENCODING,
Violation.AMBIGUOUS_EMPTY_SEGMENT,
Violation.UTF16_ENCODINGS,
Violation.USER_INFO));
/**
* Compliance mode that allows all URI Violations, including allowing ambiguous paths in non-canonical form, and illegal characters
*/
public static final UriCompliance UNSAFE = new UriCompliance("UNSAFE", allOf(Violation.class));
private static final AtomicInteger __custom = new AtomicInteger();
private static final List KNOWN_MODES = List.of(DEFAULT, LEGACY, RFC3986, UNAMBIGUOUS, UNSAFE);
public static boolean isAmbiguous(Set violations)
{
if (violations.isEmpty())
return false;
for (Violation v : AMBIGUOUS_VIOLATIONS)
if (violations.contains(v))
return true;
return false;
}
public static UriCompliance valueOf(String name)
{
for (UriCompliance compliance : KNOWN_MODES)
{
if (compliance.getName().equals(name))
return compliance;
}
if (name.indexOf(',') == -1) // skip warning if delimited, will be handled by .from() properly as a CUSTOM mode.
LOG.warn("Unknown UriCompliance mode {}", name);
return null;
}
/**
* Create compliance set from a set of allowed Violations.
*
* @param violations A string of violations to allow:
* @return the compliance from the string spec
*/
public static UriCompliance from(Set violations)
{
return new UriCompliance("CUSTOM" + __custom.getAndIncrement(), violations);
}
/**
* Create compliance set from string.
*
* Format: <BASE>[,[-]<violation>]...
*
* BASE is one of:
*
* - 0
- No {@link Violation}s
* - *
- All {@link Violation}s
* - <name>
- The name of a static instance of UriCompliance (e.g. {@link UriCompliance#RFC3986}).
*
*
* The remainder of the list can contain then names of {@link Violation}s to include them in the mode, or prefixed
* with a '-' to exclude them from the mode. Examples are:
*
*
* - {@code 0,AMBIGUOUS_PATH_PARAMETER}
- Only allow {@link Violation#AMBIGUOUS_PATH_PARAMETER}
* - {@code *,-AMBIGUOUS_PATH_PARAMETER}
- Only all except {@link Violation#AMBIGUOUS_PATH_PARAMETER}
* - {@code RFC3986,AMBIGUOUS_PATH_PARAMETER}
- Same as RFC3986 plus {@link Violation#AMBIGUOUS_PATH_PARAMETER}
*
*
* @param spec A string describing the compliance
* @return the UriCompliance instance derived from the string description
*/
public static UriCompliance from(String spec)
{
UriCompliance compliance = valueOf(spec);
if (compliance == null)
{
String[] elements = spec.split("\\s*,\\s*");
Set violations = switch (elements[0])
{
case "0" -> noneOf(Violation.class);
case "*" -> allOf(Violation.class);
default ->
{
UriCompliance mode = UriCompliance.valueOf(elements[0]);
yield (mode == null) ? noneOf(Violation.class) : copyOf(mode.getAllowed());
}
};
for (int i = 1; i < elements.length; i++)
{
String element = elements[i];
boolean exclude = element.startsWith("-");
if (exclude)
element = element.substring(1);
Violation section = Violation.valueOf(element);
if (exclude)
violations.remove(section);
else
violations.add(section);
}
compliance = new UriCompliance("CUSTOM" + __custom.getAndIncrement(), violations);
}
return compliance;
}
private final String _name;
private final Set _allowed;
public UriCompliance(String name, Set violations)
{
Objects.requireNonNull(violations);
_name = name;
_allowed = violations.isEmpty() ? NO_VIOLATION : unmodifiableSet(copyOf(violations));
}
@Override
public boolean allows(ComplianceViolation violation)
{
return violation instanceof Violation && _allowed.contains(violation);
}
@Override
public String getName()
{
return _name;
}
/**
* Get the set of {@link Violation}s allowed by this compliance mode.
*
* @return The immutable set of {@link Violation}s allowed by this compliance mode.
*/
@Override
public Set getAllowed()
{
return _allowed;
}
@Override
public Set getKnown()
{
return EnumSet.allOf(Violation.class);
}
/**
* Create a new UriCompliance mode that includes the passed {@link Violation}s.
*
* @param name The name of the new mode
* @param violations The violations to include
* @return A new {@link UriCompliance} mode.
*/
public UriCompliance with(String name, Violation... violations)
{
Set union = _allowed.isEmpty() ? EnumSet.noneOf(Violation.class) : copyOf(_allowed);
union.addAll(copyOf(violations));
return new UriCompliance(name, union);
}
/**
* Create a new UriCompliance mode that excludes the passed {@link Violation}s.
*
* @param name The name of the new mode
* @param violations The violations to exclude
* @return A new {@link UriCompliance} mode.
*/
public UriCompliance without(String name, Violation... violations)
{
Set remainder = _allowed.isEmpty() ? EnumSet.noneOf(Violation.class) : copyOf(_allowed);
remainder.removeAll(copyOf(violations));
return new UriCompliance(name, remainder);
}
/**
* Test if violation is referencing a HttpURI.path violation.
*
* @param violation the violation to test.
* @return true if violation is a path violation.
*/
public static boolean isPathViolation(UriCompliance.Violation violation)
{
return PATH_VIOLATIONS.contains(violation);
}
@Override
public String toString()
{
return String.format("%s%s", _name, _allowed);
}
private static Set copyOf(Violation[] violations)
{
if (violations == null || violations.length == 0)
return EnumSet.noneOf(Violation.class);
return EnumSet.copyOf(asList(violations));
}
private static Set copyOf(Set violations)
{
if (violations == null || violations.isEmpty())
return EnumSet.noneOf(Violation.class);
return EnumSet.copyOf(violations);
}
public static String checkUriCompliance(UriCompliance compliance, HttpURI uri, ComplianceViolation.Listener listener)
{
if (uri.hasViolations())
{
StringBuilder violations = null;
for (UriCompliance.Violation violation : uri.getViolations())
{
if (compliance == null || !compliance.allows(violation))
{
if (listener != null)
listener.onComplianceViolation(new ComplianceViolation.Event(compliance, violation, uri.toString()));
if (violations == null)
violations = new StringBuilder();
else
violations.append(", ");
violations.append(violation.getDescription());
}
}
if (violations != null)
return violations.toString();
}
return null;
}
}