All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.springframework.web.util.WhatWgUrlParser Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2002-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.springframework.web.util;

import java.net.IDN;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.function.IntPredicate;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.springframework.lang.Nullable;
import org.springframework.util.Assert;

/**
 * Implementation of the
 * URL parsing algorithm
 * of the WhatWG URL Living standard. Browsers use this algorithm to align on
 * lenient parsing of user typed URL's that may deviate from RFC syntax.
 * Use this, via {@link UriComponentsBuilder.ParserType#WHAT_WG}, if you need to
 * leniently handle URL's that don't confirm to RFC syntax, or for alignment
 * with browser behavior.
 *
 * 

Comments in this class correlate to the parsing algorithm. * The implementation differs from the spec in the following ways: *

    *
  • Supports URI template variables within URI components. *
  • Consequently, the port is a String and not an integer. *
  • Prepends '/' to each segment to ensure trailing slashes are significant. *
* All of these modifications have been indicated through comments that start * with {@code EXTRA}. * * @author Arjen Poutsma * @since 6.2 */ @SuppressWarnings({"SameParameterValue", "BooleanMethodIsAlwaysInverted"}) final class WhatWgUrlParser { public static final UrlRecord EMPTY_RECORD = new UrlRecord(); private static final Log logger = LogFactory.getLog(WhatWgUrlParser.class); private static final int EOF = -1; private static final int MAX_PORT = 65535; private final StringBuilder input; @Nullable private final UrlRecord base; @Nullable private Charset encoding; @Nullable private final Consumer validationErrorHandler; private int pointer; private final StringBuilder buffer; @Nullable private State state; @Nullable private State stateOverride; private boolean atSignSeen; private boolean passwordTokenSeen; private boolean insideBrackets; private int openCurlyBracketCount; private boolean stopMainLoop = false; private WhatWgUrlParser( String input, @Nullable UrlRecord base, @Nullable Charset encoding, @Nullable Consumer validationErrorHandler) { this.input = new StringBuilder(input); this.base = base; this.encoding = encoding; this.validationErrorHandler = validationErrorHandler; this.buffer = new StringBuilder(this.input.length() / 2); } /** * Parse the given input into a URL record. * @param input the scalar value string * @param base the optional base URL to resolve relative URLs against. If * {@code null}, relative URLs cannot be parsed. * @param encoding the optional encoding to use. If {@code null}, no * encoding is performed. * @param validationErrorHandler optional consumer for non-fatal URL * validation messages * @return a URL record, as defined in the * living URL * specification * @throws InvalidUrlException if the {@code input} does not contain a * parsable URL */ public static UrlRecord parse(String input, @Nullable UrlRecord base, @Nullable Charset encoding, @Nullable Consumer validationErrorHandler) throws InvalidUrlException { Assert.notNull(input, "Input must not be null"); WhatWgUrlParser parser = new WhatWgUrlParser(input, base, encoding, validationErrorHandler); return parser.basicUrlParser(null, null); } /** * The basic URL parser takes a scalar value string input, with an optional * null or base URL base (default null), an optional encoding (default UTF-8), * and optionally, a UrlRecord and/or State overrides to start from. */ private UrlRecord basicUrlParser(@Nullable UrlRecord url, @Nullable State stateOverride) { // If url is not given: if (url == null) { // Set url to a new URL. url = new UrlRecord(); sanitizeInput(true); } else { sanitizeInput(false); } // Let state be state override if given, or scheme start state otherwise. this.state = (stateOverride != null ? stateOverride : State.SCHEME_START); this.stateOverride = stateOverride; // Keep running the following state machine by switching on state. // If after a run pointer points to the EOF code point, go to the next step. // Otherwise, increase pointer by 1 and continue with the state machine. while (!this.stopMainLoop && this.pointer <= this.input.length()) { int c; if (this.pointer < this.input.length()) { c = this.input.codePointAt(this.pointer); } else { c = EOF; } if (logger.isTraceEnabled()) { logger.trace("current: " + (c != EOF ? Character.toString(c) : "EOF") + " ptr: " + this.pointer + " Buffer: " + this.buffer + " State: " + this.state); } this.state.handle(c, url, this); this.pointer++; } return url; } void sanitizeInput(boolean removeC0ControlOrSpace) { boolean strip = true; for (int i = 0; i < this.input.length(); i++) { int c = this.input.codePointAt(i); boolean isSpaceOrC0 = (c == ' ' || isC0Control(c)); boolean isTabOrNL = (c == '\t' || isNewline(c)); if ((strip && isSpaceOrC0) || isTabOrNL) { if (validate()) { // If input contains leading (or trailing) C0 control or space, invalid-URL-unit validation error. // If input contains ASCII tab or newline, invalid-URL-unit validation error. validationError("Code point \"" + c + "\" is not a URL unit."); } // Remove any leading C0 control or space from input. if (removeC0ControlOrSpace && isSpaceOrC0) { this.input.deleteCharAt(i); } else if (isTabOrNL) { // Remove all ASCII tab or newline from input. this.input.deleteCharAt(i); } i--; } else { strip = false; } } if (removeC0ControlOrSpace) { for (int i = this.input.length() - 1; i >= 0; i--) { int c = this.input.codePointAt(i); if (c == ' ' || isC0Control(c)) { if (validate()) { // If input contains (leading or) trailing C0 control or space, invalid-URL-unit validation error. validationError("Code point \"" + c + "\" is not a URL unit."); } // Remove any trailing C0 control or space from input. this.input.deleteCharAt(i); } else { break; } } } } private void setState(State newState) { if (logger.isTraceEnabled()) { String c; if (this.pointer < this.input.length()) { c = Character.toString(this.input.codePointAt(this.pointer)); } else { c = "EOF"; } logger.trace("Changing state from " + this.state + " to " + newState + " (cur: " + c + ")"); } this.state = newState; this.openCurlyBracketCount = (this.buffer.toString().equals("{") ? this.openCurlyBracketCount : 0); } private boolean processCurlyBrackets(int c) { if (c == '{') { this.openCurlyBracketCount++; return true; } if (c == '}') { if (this.openCurlyBracketCount > 0) { this.openCurlyBracketCount--; return true; } return false; } return (this.openCurlyBracketCount > 0 && c != EOF); } private static LinkedList strictSplit(String input, int delimiter) { // Let position be a position variable for input, initially pointing at the start of input. int position = 0; // Let tokens be a list of strings, initially empty. LinkedList tokens = new LinkedList<>(); // Let token be the result of collecting a sequence of code points that are not equal to delimiter from input, given position. int delIdx = input.indexOf(delimiter, position); String token = (delIdx != EOF) ? input.substring(position, delIdx) : input.substring(position); position = delIdx; // Append token to tokens. tokens.add(token); // While position is not past the end of input: while (position != EOF) { // Assert: the code point at position within input is delimiter. Assert.state(input.codePointAt(position) == delimiter, "Codepoint is not a delimiter"); // Advance position by 1. position++; delIdx = input.indexOf(delimiter, position); // Let token be the result of collecting a sequence of code points // that are not equal to delimiter from input, given position. token = (delIdx != EOF) ? input.substring(position, delIdx) : input.substring(position); position = delIdx; // Append token to tokens. tokens.add(token); } return tokens; } private static String domainToAscii(String domain, boolean beStrict) { // If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.) // does not produce any item that starts with an ASCII case-insensitive match for "xn--", // this step is equivalent to ASCII lowercasing domain. if (!beStrict && containsOnlyAscii(domain)) { int dotIdx = domain.indexOf('.'); boolean onlyLowerCase = true; while (dotIdx != -1) { if (domain.length() - dotIdx > 4) { // ASCII case-insensitive match for "xn--" int ch0 = domain.codePointAt(dotIdx + 1); int ch1 = domain.codePointAt(dotIdx + 2); int ch2 = domain.codePointAt(dotIdx + 3); int ch3 = domain.codePointAt(dotIdx + 4); if ((ch0 == 'x' || ch0 == 'X') && (ch1 == 'n' || ch1 == 'N') && ch2 == '-' && ch3 == '_') { onlyLowerCase = false; break; } } dotIdx = domain.indexOf('.', dotIdx + 1); } if (onlyLowerCase) { return domain.toLowerCase(Locale.ENGLISH); } } // Let result be the result of running Unicode ToASCII (https://www.unicode.org/reports/tr46/#ToASCII) // with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, // CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, // and VerifyDnsLength set to beStrict. [UTS46] int flag = 0; if (beStrict) { flag |= IDN.USE_STD3_ASCII_RULES; } // Implementation note: implementing Unicode ToASCII is beyond the scope of this parser, // we use java.net.IDN.toASCII try { return IDN.toASCII(domain, flag); } catch (IllegalArgumentException ex) { throw new InvalidUrlException( "Could not convert \"" + domain + "\" to ASCII: " + ex.getMessage(), ex); } } private boolean validate() { return this.validationErrorHandler != null; } private void validationError(@Nullable String additionalInfo) { if (this.validationErrorHandler != null) { StringBuilder message = new StringBuilder("URL validation error for URL ["); message.append(this.input); message.append("]@"); message.append(this.pointer); if (additionalInfo != null) { message.append(". "); message.append(additionalInfo); } this.validationErrorHandler.accept(message.toString()); } } private void failure(@Nullable String additionalInfo) { StringBuilder message = new StringBuilder("URL parsing failure for URL ["); message.append(this.input); message.append("] @ "); message.append(this.pointer); if (additionalInfo != null) { message.append(". "); message.append(additionalInfo); } throw new InvalidUrlException(message.toString()); } /** * The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~). */ private static boolean c0ControlPercentEncodeSet(int ch) { return (isC0Control(ch) || Integer.compareUnsigned(ch, '~') > 0); } /** * The fragment percent-encode set is the C0 control percent-encode set and * U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`). */ private static boolean fragmentPercentEncodeSet(int ch) { return (c0ControlPercentEncodeSet(ch) || ch == ' ' || ch == '"' || ch == '<' || ch == '>' || ch == '`'); } /** * The query percent-encode set is the C0 control percent-encode set and * U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>). */ private static boolean queryPercentEncodeSet(int ch) { return (c0ControlPercentEncodeSet(ch) || ch == ' ' || ch == '"' || ch == '#' || ch == '<' || ch == '>'); } /** * The special-query percent-encode set is the query percent-encode set and U+0027 ('). */ private static boolean specialQueryPercentEncodeSet(int ch) { return (queryPercentEncodeSet(ch) || ch == '\''); } /** * The path percent-encode set is the query percent-encode set and * U+003F (?), U+0060 (`), U+007B ({), and U+007D (}). */ private static boolean pathPercentEncodeSet(int ch) { return (queryPercentEncodeSet(ch) || ch == '?' || ch == '`' || ch == '{' || ch == '}'); } /** * The userinfo percent-encode set is the path percent-encode set and * U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), * U+005B ([) to U+005E (^), inclusive, and U+007C (|). */ private static boolean userinfoPercentEncodeSet(int ch) { return (pathPercentEncodeSet(ch) || ch == '/' || ch == ':' || ch == ';' || ch == '=' || ch == '@' || (Integer.compareUnsigned(ch, '[') >= 0 && Integer.compareUnsigned(ch, '^') <= 0) || ch == '|'); } private static boolean isC0Control(int ch) { return (ch >= 0 && ch <= 0x1F); } private static boolean isNewline(int ch) { return (ch == '\r' || ch == '\n'); } private static boolean isAsciiAlpha(int ch) { return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); } private static boolean containsOnlyAsciiDigits(CharSequence string) { for (int i=0; i< string.length(); i++ ) { int ch = codePointAt(string, i); if (!isAsciiDigit(ch)) { return false; } } return true; } private static boolean containsOnlyAscii(String string) { for (int i = 0; i < string.length(); i++) { int ch = string.codePointAt(i); if (!isAsciiCodePoint(ch)) { return false; } } return true; } private static boolean isAsciiCodePoint(int ch) { // An ASCII code point is a code point in the range U+0000 NULL to U+007F DELETE, inclusive. return (Integer.compareUnsigned(ch, 0) >= 0 && Integer.compareUnsigned(ch, 127) <= 0); } private static boolean isAsciiDigit(int ch) { return (ch >= '0' && ch <= '9'); } private static boolean isAsciiAlphaNumeric(int ch) { return (isAsciiAlpha(ch) || isAsciiDigit(ch)); } private static boolean isAsciiHexDigit(int ch) { return (isAsciiDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')); } private static boolean isForbiddenDomain(int ch) { return (isForbiddenHost(ch) || isC0Control(ch) || ch == '%' || ch == 0x7F); } private static boolean isForbiddenHost(int ch) { return (ch == 0x00 || ch == '\t' || isNewline(ch) || ch == ' ' || ch == '#' || ch == '/' || ch == ':' || ch == '<' || ch == '>' || ch == '?' || ch == '@' || ch == '[' || ch == '\\' || ch == ']' || ch == '^' || ch == '|'); } private static boolean isNonCharacter(int ch) { return ((ch >= 0xFDD0 && ch <= 0xFDEF) || ch == 0xFFFE || ch == 0xFFFF || ch == 0x1FFFE || ch == 0x1FFFF || ch == 0x2FFFE || ch == 0x2FFFF || ch == 0x3FFFE || ch == 0x3FFFF || ch == 0x4FFFE || ch == 0x4FFFF || ch == 0x5FFFE || ch == 0x5FFFF || ch == 0x6FFFE || ch == 0x6FFFF || ch == 0x7FFFE || ch == 0x7FFFF || ch == 0x8FFFE || ch == 0x8FFFF || ch == 0x9FFFE || ch == 0x9FFFF || ch == 0xAFFFE || ch == 0xAFFFF || ch == 0xBFFFE || ch == 0xBFFFF || ch == 0xCFFFE || ch == 0xCFFFF || ch == 0xDFFFE || ch == 0xDFFFF || ch == 0xEFFFE || ch == 0xEFFFF || ch == 0xFFFFE || ch == 0xFFFFF || ch == 0x10FFFE || ch == 0x10FFFF); } @SuppressWarnings("BooleanMethodIsAlwaysInverted") private static boolean isUrlCodePoint(int ch) { return (isAsciiAlphaNumeric(ch) || ch == '!' || ch == '$' || ch == '&' || ch == '\'' || ch == '(' || ch == ')' || ch == '*' || ch == '+' || ch == ',' || ch == '-' || ch == '.' || ch == '/' || ch == ':' || ch == ';' || ch == '=' || ch == '?' || ch == '@' || ch == '_' || ch == '~' || (ch >= 0x00A0 && ch <= 0x10FFFD && !Character.isSurrogate((char) ch) && !isNonCharacter(ch))); } private static boolean isSpecialScheme(String scheme) { return ("ftp".equals(scheme) || "file".equals(scheme) || "http".equals(scheme) || "https".equals(scheme) || "ws".equals(scheme) || "wss".equals(scheme)); } private static int defaultPort(@Nullable String scheme) { if (scheme != null) { return switch (scheme) { case "ftp" -> 21; case "http", "ws" -> 80; case "https", "wss" -> 443; default -> -1; }; } else { return -1; } } private void append(String s) { this.buffer.append(s); } private void append(char ch) { this.buffer.append(ch); } private void append(int ch) { this.buffer.appendCodePoint(ch); } private void prepend(String s) { this.buffer.insert(0, s); } private void emptyBuffer() { this.buffer.setLength(0); } private int remaining(int deltaPos) { int pos = this.pointer + deltaPos + 1; return (pos < this.input.length() ? this.input.codePointAt(pos) : EOF); } private static String percentDecode(String input) { try { return UriUtils.decode(input, StandardCharsets.UTF_8); } catch (IllegalArgumentException ex) { throw new InvalidUrlException("Could not decode \"" + input + "\": " + ex.getMessage(), ex); } } @Nullable private String percentEncode(int c, IntPredicate percentEncodeSet) { if (this.encoding == null) { return null; } else { return percentEncode(Character.toString(c), percentEncodeSet); } } private String percentEncode(String input, IntPredicate percentEncodeSet) { if (this.encoding == null) { return input; } else { byte[] bytes = input.getBytes(this.encoding); boolean original = true; for (byte b : bytes) { if (percentEncodeSet.test(b)) { original = false; break; } } if (original) { return input; } StringBuilder output = new StringBuilder(); for (byte b : bytes) { if (!percentEncodeSet.test(b)) { output.append((char)b); } else { output.append('%'); char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); output.append(hex1); output.append(hex2); } } return output.toString(); } } /** * A single-dot URL path segment is a URL path segment that is "[/]." or * an ASCII case-insensitive match for "[/]%2e". */ private static boolean isSingleDotPathSegment(StringBuilder b) { int len = b.length(); switch (len) { case 1 -> { int ch0 = b.codePointAt(0); return ch0 == '.'; } case 2 -> { int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); return ch0 == '/' && ch1 == '.'; } case 3 -> { // ASCII case-insensitive match for "%2e". int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); int ch2 = b.codePointAt(2); return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E'); } case 4 -> { // ASCII case-insensitive match for "/%2e". int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); int ch2 = b.codePointAt(2); int ch3 = b.codePointAt(3); return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E'); } default -> { return false; } } } /** * A double-dot URL path segment is a URL path segment that is "[/].." or * an ASCII case-insensitive match for "/.%2e", "/%2e.", or "/%2e%2e". */ private static boolean isDoubleDotPathSegment(StringBuilder b) { int len = b.length(); switch (len) { case 2 -> { int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); return (ch0 == '.' && ch1 == '.'); } case 3 -> { int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); int ch2 = b.codePointAt(2); return (ch0 == '/' && ch1 == '.' && ch2 == '.'); } case 4 -> { int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); int ch2 = b.codePointAt(2); int ch3 = b.codePointAt(3); // case-insensitive match for ".%2e" or "%2e." return (ch0 == '.' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') || (ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E') && ch3 == '.')); } case 5 -> { int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); int ch2 = b.codePointAt(2); int ch3 = b.codePointAt(3); int ch4 = b.codePointAt(4); // case-insensitive match for "/.%2e" or "/%2e." return (ch0 == '/' && (ch1 == '.' && ch2 == '%' && ch3 == '2' && (ch4 == 'e' || ch4 == 'E') || (ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') && ch4 == '.'))); } case 6 -> { int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); int ch2 = b.codePointAt(2); int ch3 = b.codePointAt(3); int ch4 = b.codePointAt(4); int ch5 = b.codePointAt(5); // case-insensitive match for "%2e%2e". return (ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E') && ch3 == '%' && ch4 == '2' && (ch5 == 'e' || ch5 == 'E')); } case 7 -> { int ch0 = b.codePointAt(0); int ch1 = b.codePointAt(1); int ch2 = b.codePointAt(2); int ch3 = b.codePointAt(3); int ch4 = b.codePointAt(4); int ch5 = b.codePointAt(5); int ch6 = b.codePointAt(6); // case-insensitive match for "/%2e%2e". return (ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') && ch4 == '%' && ch5 == '2' && (ch6 == 'e' || ch6 == 'E')); } default -> { return false; } } } /** * A Windows drive letter is two code points, of which the first is an ASCII alpha * and the second is either U+003A {@code (:)} or U+007C {@code (|)}. * A normalized Windows drive letter is a Windows drive letter of which * the second code point is U+003A {@code (:)}. */ private static boolean isWindowsDriveLetter(CharSequence input, boolean normalized) { if (input.length() != 2) { return false; } return isWindowsDriveLetterInternal(input, normalized); } /** * A string starts with a Windows drive letter if all the following are true: * its length is greater than or equal to 2 * its first two code points are a Windows drive letter * its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#). */ private static boolean startsWithWindowsDriveLetter(String input) { int len = input.length(); if (len < 2) { return false; } if (!isWindowsDriveLetterInternal(input, false)) { return false; } if (len == 2) { return true; } else { int ch2 = input.codePointAt(2); return (ch2 == '/' || ch2 == '\\' || ch2 == '?' || ch2 == '#'); } } private static boolean isWindowsDriveLetterInternal(CharSequence s, boolean normalized) { int ch0 = codePointAt(s, 0); if (!isAsciiAlpha(ch0)) { return false; } else { int ch1 = codePointAt(s, 1); if (normalized) { return ch1 == ':'; } else { return ch1 == ':' || ch1 == '|'; } } } private static int codePointAt(CharSequence s, int index) { if (s instanceof String string) { return string.codePointAt(index); } else if (s instanceof StringBuilder builder) { return builder.codePointAt(index); } else { throw new IllegalStateException(); } } private enum State { SCHEME_START { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is an ASCII alpha, append c, lowercased, to buffer, and set state to scheme state. if (isAsciiAlpha(c)) { p.append(p.openCurlyBracketCount == 0 ? Character.toLowerCase((char) c) : c); p.setState(SCHEME); } // EXTRA: if c is '{', append to buffer and continue as SCHEME else if (c == '{') { p.openCurlyBracketCount++; p.append(c); p.setState(SCHEME); } // Otherwise, if state override is not given, // set state to no scheme state and decrease pointer by 1. else if (p.stateOverride == null) { p.setState(NO_SCHEME); p.pointer--; } // Otherwise, return failure. else { p.failure(null); } } }, SCHEME { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E (.), append c, lowercased, to buffer. if (isAsciiAlphaNumeric(c) || (c == '+' || c == '-' || c == '.')) { p.append(p.openCurlyBracketCount == 0 ? Character.toLowerCase((char) c) : c); } // Otherwise, if c is U+003A (:), then: else if (c == ':') { // If state override is given, then: if (p.stateOverride != null) { boolean urlSpecialScheme = url.isSpecial(); String bufferString = p.buffer.toString(); boolean bufferSpecialScheme = isSpecialScheme(bufferString); // If url’s scheme is a special scheme and buffer is not a special scheme, then return. if (urlSpecialScheme && !bufferSpecialScheme) { return; } // If url’s scheme is not a special scheme and buffer is a special scheme, then return. if (!urlSpecialScheme && bufferSpecialScheme) { return; } // If url includes credentials or has a non-null port, and buffer is "file", then return. if ((url.includesCredentials() || url.port() != null) && "file".equals(bufferString)) { return; } // If url’s scheme is "file" and its host is an empty host, then return. if ("file".equals(url.scheme()) && (url.host() == null || url.host() == EmptyHost.INSTANCE)) { return; } } // Set url’s scheme to buffer. url.scheme = p.buffer.toString(); // If state override is given, then: if (p.stateOverride != null) { // If url’s port is url’s scheme’s default port, then set url’s port to null. if (url.port instanceof IntPort intPort && intPort.value() == defaultPort(url.scheme)) { url.port = null; // Return. p.stopMainLoop = true; return; } } // Set buffer to the empty string. p.emptyBuffer(); // If url’s scheme is "file", then: if (url.scheme.equals("file")) { // If remaining does not start with "//", // special-scheme-missing-following-solidus validation error. if (p.validate() && (p.remaining(0) != '/' || p.remaining(1) != '/')) { p.validationError("\"file\" scheme not followed by \"//\"."); } // Set state to file state. p.setState(FILE); } // Otherwise, if url is special, base is non-null, and base’s scheme is url’s scheme: else if (url.isSpecial() && p.base != null && p.base.scheme().equals(url.scheme)) { // Assert: base is special (and therefore does not have an opaque path). Assert.state(!p.base.path().isOpaque(), "Opaque path not expected"); // Set state to special relative or authority state. p.setState(SPECIAL_RELATIVE_OR_AUTHORITY); } // Otherwise, if url is special, set state to special authority slashes state. else if (url.isSpecial()) { p.setState(SPECIAL_AUTHORITY_SLASHES); } // Otherwise, if remaining starts with an U+002F (/), // set state to path or authority state and increase pointer by 1. else if (p.remaining(0) == '/') { p.setState(PATH_OR_AUTHORITY); p.pointer++; } // Otherwise, set url’s path to the empty string and set state to opaque path state. else { url.path = new PathSegment(""); p.setState(OPAQUE_PATH); } } // EXTRA: if c is within URI variable, keep appending else if (p.processCurlyBrackets(c)) { p.append(c); } // Otherwise, if state override is not given, set buffer to the empty string, // state to no scheme state, and start over (from the first code point in input). else if (p.stateOverride == null) { p.emptyBuffer(); p.setState(NO_SCHEME); p.pointer = -1; } // Otherwise, return failure. else { p.failure(null); } } }, NO_SCHEME { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If base is null, or base has an opaque path and c is not U+0023 (#), // missing-scheme-non-relative-URL validation error, return failure. if (p.base == null || p.base.path().isOpaque() && c != '#') { p.failure("The input is missing a scheme, because it does not begin with an ASCII alpha \"" + (c != EOF ? Character.toString(c) : "") + "\", and no base URL was provided."); } // Otherwise, if base has an opaque path and c is U+0023 (#), // set url’s scheme to base’s scheme, url’s path to base’s path, // url’s query to base’s query, url’s fragment to the empty string, // and set state to fragment state. else if (p.base.path().isOpaque() && c == '#') { url.scheme = p.base.scheme(); url.path = p.base.path(); url.query = p.base.query; url.fragment = new StringBuilder(); p.setState(FRAGMENT); } // Otherwise, if base’s scheme is not "file", // set state to relative state and decrease pointer by 1. else if (!"file".equals(p.base.scheme())) { p.setState(RELATIVE); p.pointer--; } // Otherwise, set state to file state and decrease pointer by 1. else { p.setState(FILE); p.pointer--; } } }, SPECIAL_RELATIVE_OR_AUTHORITY { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is U+002F (/) and remaining starts with U+002F (/), // then set state to special authority ignore slashes state and // increase pointer by 1. if (c == '/' && p.remaining(0) == '/') { p.setState(SPECIAL_AUTHORITY_IGNORE_SLASHES); p.pointer++; } // Otherwise, special-scheme-missing-following-solidus validation error, // set state to relative state and decrease pointer by 1. else { if (p.validate()) { p.validationError("The input’s scheme is not followed by \"//\"."); } p.setState(RELATIVE); p.pointer--; } } }, PATH_OR_AUTHORITY { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is U+002F (/), then set state to authority state. if (c == '/') { p.setState(AUTHORITY); } // Otherwise, set state to path state, and decrease pointer by 1. else { p.setState(PATH); p.pointer--; } } }, RELATIVE { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // Assert: base’s scheme is not "file". Assert.state(p.base != null && !"file".equals(p.base.scheme()), "Base scheme not provided or supported"); // Set url’s scheme to base’s scheme. url.scheme = p.base.scheme; // If c is U+002F (/), then set state to relative slash state. if (c == '/') { // EXTRA : append '/' to let the path segment start with / p.append('/'); p.setState(RELATIVE_SLASH); } // Otherwise, if url is special and c is U+005C (\), // invalid-reverse-solidus validation error, set state to relative slash state. else if (url.isSpecial() && c == '\\') { if (p.validate()) { p.validationError("URL uses \\ instead of /."); } // EXTRA : append '/' to let the path segment start with / p.append('/'); p.setState(RELATIVE_SLASH); } // Otherwise else { // Set url’s username to base’s username, url’s password to base’s password, // url’s host to base’s host, url’s port to base’s port, // url’s path to a clone of base’s path, and url’s query to base’s query. url.username = ((p.base.username != null) ? new StringBuilder(p.base.username) : null); url.password = ((p.base.password != null) ? new StringBuilder(p.base.password) : null); url.host = p.base.host(); url.port = p.base.port(); url.path = p.base.path().clone(); url.query = p.base.query; // If c is U+003F (?), then set url’s query to the empty string, and state to query state. if (c == '?') { url.query = new StringBuilder(); p.setState(QUERY); } // Otherwise, if c is U+0023 (#), set url’s fragment to the empty string and state to fragment state. else if (c == '#') { url.fragment = new StringBuilder(); p.setState(FRAGMENT); } // Otherwise, if c is not the EOF code point: else if (c != EOF) { // Set url’s query to null. url.query = null; // Shorten url’s path. url.shortenPath(); // Set state to path state and decrease pointer by 1. p.setState(PATH); p.pointer--; } } } }, RELATIVE_SLASH { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If url is special and c is U+002F (/) or U+005C (\), then: if (url.isSpecial() && (c == '/' || c == '\\')) { // If c is U+005C (\), invalid-reverse-solidus validation error. if (p.validate() && c == '\\') { p.validationError("URL uses \\ instead of /."); } // Set state to special authority ignore slashes state. p.setState(SPECIAL_AUTHORITY_IGNORE_SLASHES); } // Otherwise, if c is U+002F (/), then set state to authority state. else if (c == '/') { // EXTRA: empty buffer to remove appended slash, since this is not a path p.emptyBuffer(); p.setState(AUTHORITY); } // Otherwise, set url’s username to base’s username, url’s password to base’s password, // url’s host to base’s host, url’s port to base’s port, state to path state, // and then, decrease pointer by 1. else { Assert.state(p.base != null, "No base URL available"); url.username = (p.base.username != null) ? new StringBuilder(p.base.username) : null; url.password = (p.base.password != null) ? new StringBuilder(p.base.password) : null; url.host = p.base.host(); url.port = p.base.port(); p.setState(PATH); p.pointer--; } } }, SPECIAL_AUTHORITY_SLASHES { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is U+002F (/) and remaining starts with U+002F (/), // then set state to special authority ignore slashes state and // increase pointer by 1. if (c == '/' && p.remaining(0) == '/') { p.setState(SPECIAL_AUTHORITY_IGNORE_SLASHES); p.pointer++; } // Otherwise, special-scheme-missing-following-solidus validation error, // set state to special authority ignore slashes state and decrease pointer by 1. else { if (p.validate()) { p.validationError("Scheme \"" + url.scheme + "\" not followed by \"//\"."); } p.setState(SPECIAL_AUTHORITY_IGNORE_SLASHES); p.pointer--; } } }, SPECIAL_AUTHORITY_IGNORE_SLASHES { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is neither U+002F (/) nor U+005C (\), // then set state to authority state and decrease pointer by 1. if (c != '/' && c != '\\') { p.setState(AUTHORITY); p.pointer--; } // Otherwise, special-scheme-missing-following-solidus validation error. else { if (p.validate()) { p.validationError("Scheme \"" + url.scheme + "\" not followed by \"//\"."); } } } }, AUTHORITY { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is U+0040 (@), then: if (c == '@') { // Invalid-credentials validation error. if (p.validate()) { p.validationError("Invalid credentials"); } // If atSignSeen is true, then prepend "%40" to buffer. if (p.atSignSeen) { p.prepend("%40"); } // Set atSignSeen to true. p.atSignSeen = true; int bufferLen = p.buffer.length(); // For each codePoint in buffer: for (int i = 0; i < bufferLen; i++) { int codePoint = p.buffer.codePointAt(i); // If codePoint is U+003A (:) and passwordTokenSeen is false, // then set passwordTokenSeen to true and continue. if (codePoint == ':' && !p.passwordTokenSeen) { p.passwordTokenSeen = true; continue; } // Let encodedCodePoints be the result of running UTF-8 percent-encode codePoint // using the userinfo percent-encode set. String encodedCodePoints = p.percentEncode(codePoint, WhatWgUrlParser::userinfoPercentEncodeSet); // If passwordTokenSeen is true, then append encodedCodePoints to url’s password. if (p.passwordTokenSeen) { if (encodedCodePoints != null) { url.appendToPassword(encodedCodePoints); } else { url.appendToPassword(codePoint); } } // Otherwise, append encodedCodePoints to url’s username. else { if (encodedCodePoints != null) { url.appendToUsername(encodedCodePoints); } else { url.appendToUsername(codePoint); } } } // Set buffer to the empty string. p.emptyBuffer(); } // Otherwise, if one of the following is true: // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) // - url is special and c is U+005C (\) else if ((c == EOF || c == '/' || c == '?' || c == '#') || (url.isSpecial() && c == '\\')) { // If atSignSeen is true and buffer is the empty string, // host-missing validation error, return failure. if (p.atSignSeen && p.buffer.isEmpty()) { p.failure("Missing host."); } // Decrease pointer by buffer’s code point length + 1, // set buffer to the empty string, and set state to host state. p.pointer -= p.buffer.length() + 1; p.emptyBuffer(); p.setState(HOST); } // Otherwise, append c to buffer. else { p.append(c); } } }, HOST { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If state override is given and url’s scheme is "file", // then decrease pointer by 1 and set state to file host state. if (p.stateOverride != null && "file".equals(url.scheme())) { p.pointer--; p.setState(FILE_HOST); } // Otherwise, if c is U+003A (:) and insideBrackets is false, then: else if (c == ':' && !p.insideBrackets) { // If buffer is the empty string, host-missing validation error, return failure. if (p.buffer.isEmpty()) { p.failure("Missing host."); } // If state override is given and state override is hostname state, then return. if (p.stateOverride == HOST) { p.stopMainLoop = true; return; } // Let host be the result of host parsing buffer with url is not special. // Set url’s host to host, buffer to the empty string, and state to port state. url.host = Host.parse(p.buffer.toString(), !url.isSpecial(), p); p.emptyBuffer(); p.setState(PORT); } // Otherwise, if one of the following is true: // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) // - url is special and c is U+005C (\) else if ( (c == EOF || c == '/' || c == '?' || c == '#') || (url.isSpecial() && c == '\\')) { // then decrease pointer by 1, and then: p.pointer--; // If url is special and buffer is the empty string, // host-missing validation error, return failure. if (url.isSpecial() && p.buffer.isEmpty()) { p.failure("The input has a special scheme, but does not contain a host."); } // Otherwise, if state override is given, buffer is the empty string, // and either url includes credentials or url’s port is non-null, return. else if (p.stateOverride != null && p.buffer.isEmpty() && (url.includesCredentials() || url.port() != null )) { p.stopMainLoop = true; return; } // EXTRA: if buffer is not empty if (!p.buffer.isEmpty()) { // Let host be the result of host parsing buffer with url is not special. // Set url’s host to host, buffer to the empty string, and state to path start state. url.host = Host.parse(p.buffer.toString(), !url.isSpecial(), p); } else { url.host = EmptyHost.INSTANCE; } p.emptyBuffer(); p.setState(PATH_START); // If state override is given, then return. if (p.stateOverride != null) { p.stopMainLoop = true; } } // Otherwise: else { // If c is U+005B ([), then set insideBrackets to true. if (c == '[') { p.insideBrackets = true; } // If c is U+005D (]), then set insideBrackets to false. else if (c == ']') { p.insideBrackets = false; } // Append c to buffer. p.append(c); } } }, PORT { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is an ASCII digit, append c to buffer. if (isAsciiDigit(c)) { p.append(c); } // Otherwise, if one of the following is true: // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) // - url is special and c is U+005C (\) // - state override is given else if (c == EOF || c == '/' || c == '?' || c == '#' || (url.isSpecial() && c == '\\') || (p.stateOverride != null)) { // If buffer is not the empty string, then: if (!p.buffer.isEmpty()) { // EXTRA: if buffer contains only ASCII digits, then if (containsOnlyAsciiDigits(p.buffer)) { try { // Let port be the mathematical integer value that is represented // by buffer in radix-10 using ASCII digits for digits with values 0 through 9. int port = Integer.parseInt(p.buffer, 0, p.buffer.length(), 10); // If port is greater than 2^16 − 1, // port-out-of-range validation error, return failure. if (port > MAX_PORT) { p.failure("Port \"" + port + "\" is out of range"); } int defaultPort = defaultPort(url.scheme); // Set url’s port to null, if port is url’s scheme’s default port; otherwise to port. if (defaultPort != -1 && port == defaultPort) { url.port = null; } else { url.port = new IntPort(port); } } catch (NumberFormatException ex) { p.failure(ex.getMessage()); } } // EXTRA: otherwise, set url's port to buffer else { url.port = new StringPort(p.buffer.toString()); } // Set buffer to the empty string. p.emptyBuffer(); } // If state override is given, then return. if (p.stateOverride != null) { p.stopMainLoop = true; return; } // Set state to path start state and decrease pointer by 1. p.setState(PATH_START); p.pointer--; } // EXTRA: if c is within URI variable, keep appending else if (p.processCurlyBrackets(c)) { p.append(c); } // Otherwise, port-invalid validation error, return failure. else { p.failure("Invalid port: \"" + Character.toString(c) + "\""); } } }, FILE { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // Set url’s scheme to "file". url.scheme = "file"; // Set url’s host to the empty string. url.host = EmptyHost.INSTANCE; // If c is U+002F (/) or U+005C (\), then: if (c == '/' || c == '\\') { // If c is U+005C (\), invalid-reverse-solidus validation error. if (p.validate() && c == '\\') { p.validationError("URL uses \\ instead of /."); } // Set state to file slash state. p.setState(FILE_SLASH); } // Otherwise, if base is non-null and base’s scheme is "file": else if (p.base != null && p.base.scheme().equals("file")) { // Set url’s host to base’s host, url’s path to a clone of base’s path, // and url’s query to base’s query. url.host = p.base.host; url.path = p.base.path().clone(); url.query = p.base.query; // If c is U+003F (?), then set url’s query to the empty string and state to query state. if (c == '?') { url.query = new StringBuilder(); p.setState(QUERY); } // Otherwise, if c is U+0023 (#), set url’s fragment to // the empty string and state to fragment state. else if (c == '#') { url.fragment = new StringBuilder(); p.setState(FRAGMENT); } // Otherwise, if c is not the EOF code point: else if (c != EOF) { // Set url’s query to null. url.query = null; // If the code point substring from pointer to the end of input does not start with // a Windows drive letter, then shorten url’s path. String substring = p.input.substring(p.pointer); if (!startsWithWindowsDriveLetter(substring)) { url.shortenPath(); } // Otherwise: else { // File-invalid-Windows-drive-letter validation error. if (p.validate()) { p.validationError("The input is a relative-URL string that starts with " + "a Windows drive letter and the base URL’s scheme is \"file\"."); } // Set url’s path to « ». url.path = new PathSegments(); } // Set state to path state and decrease pointer by 1. p.setState(PATH); p.pointer--; } } // Otherwise, set state to path state, and decrease pointer by 1. else { p.setState(PATH); p.pointer--; } } }, FILE_SLASH { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is U+002F (/) or U+005C (\), then: if (c == '/' || c == '\\') { // If c is U+005C (\), invalid-reverse-solidus validation error. if (p.validate() && c == '\\') { p.validationError("URL uses \\ instead of /."); } // Set state to file host state. p.setState(FILE_HOST); } // Otherwise: else { // If base is non-null and base’s scheme is "file", then: if (p.base != null && p.base.scheme.equals("file")) { // Set url’s host to base’s host. url.host = p.base.host; // If the code point substring from pointer to the end of input does not start with // a Windows drive letter and base’s path[0] is a normalized Windows drive letter, // then append base’s path[0] to url’s path. String substring = p.input.substring(p.pointer); if (!startsWithWindowsDriveLetter(substring) && p.base.path instanceof PathSegments basePath && !basePath.isEmpty() && isWindowsDriveLetter(basePath.get(0), true)) { url.path.append(basePath.get(0)); } } // Set state to path state, and decrease pointer by 1. p.setState(PATH); p.pointer--; } } }, FILE_HOST { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), // then decrease pointer by 1 and then: if (c == EOF || c == '/' || c == '\\' || c == '?' || c == '#') { p.pointer--; // If state override is not given and buffer is a Windows drive letter, // file-invalid-Windows-drive-letter-host validation error, set state to path state. if (p.stateOverride == null && isWindowsDriveLetter(p.buffer, false)) { p.validationError("A file: URL’s host is a Windows drive letter."); p.setState(PATH); } // Otherwise, if buffer is the empty string, then: else if (p.buffer.isEmpty()) { // Set url’s host to the empty string. url.host = EmptyHost.INSTANCE; // If state override is given, then return. if (p.stateOverride != null) { p.stopMainLoop = true; return; } // Set state to path start state. p.setState(PATH_START); } // Otherwise, run these steps: else { // Let host be the result of host parsing buffer with url is not special. Host host = Host.parse(p.buffer.toString(), !url.isSpecial(), p); // If host is "localhost", then set host to the empty string. if (host instanceof Domain domain && domain.domain().equals("localhost")) { host = EmptyHost.INSTANCE; } // Set url’s host to host. url.host = host; // If state override is given, then return. if (p.stateOverride != null) { p.stopMainLoop = true; return; } // Set buffer to the empty string and state to path start state. p.emptyBuffer(); p.setState(PATH_START); } } // Otherwise, append c to buffer. else { p.append(c); } } }, PATH_START { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If url is special, then: if (url.isSpecial()) { // If c is U+005C (\), invalid-reverse-solidus validation error. if (p.validate() && c == '\\') { p.validationError("URL uses \"\\\" instead of \"/\""); } // Set state to path state. p.setState(PATH); // If c is neither U+002F (/) nor U+005C (\), then decrease pointer by 1. if (c != '/' && c != '\\') { p.pointer--; } else { p.append('/'); } } // Otherwise, if state override is not given and if c is U+003F (?), // set url’s query to the empty string and state to query state. else if (p.stateOverride == null && c == '?') { url.query = new StringBuilder(); p.setState(QUERY); } // Otherwise, if state override is not given and if c is U+0023 (#), // set url’s fragment to the empty string and state to fragment state. else if (p.stateOverride == null && c =='#') { url.fragment = new StringBuilder(); p.setState(FRAGMENT); } // Otherwise, if c is not the EOF code point: else if (c != EOF) { // Set state to path state. p.setState(PATH); // If c is not U+002F (/), then decrease pointer by 1. if (c != '/') { p.pointer--; } // EXTRA: otherwise append '/' to let the path segment start with / else { p.append('/'); } } // Otherwise, if state override is given and url’s host is null, // append the empty string to url’s path. else if (p.stateOverride != null && url.host() == null) { url.path().append(""); } } }, PATH { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If one of the following is true: // - c is the EOF code point or U+002F (/) // - url is special and c is U+005C (\) // - state override is not given and c is U+003F (?) or U+0023 (#) // then: if (c == EOF || c == '/' || (url.isSpecial() && c == '\\') || (p.stateOverride == null && (c == '?' || c == '#'))) { // If url is special and c is U+005C (\), invalid-reverse-solidus validation error. if (p.validate() && url.isSpecial() && c == '\\') { p.validationError("URL uses \"\\\" instead of \"/\""); } // If buffer is a double-dot URL path segment, then: if (isDoubleDotPathSegment(p.buffer)) { // Shorten url’s path. url.shortenPath(); // If neither c is U+002F (/), nor url is special and c is U+005C (\), // append the empty string to url’s path. if (c != '/' && !(url.isSpecial() && c == '\\')) { url.path.append(""); } } else { boolean singlePathSegment = isSingleDotPathSegment(p.buffer); // Otherwise, if buffer is a single-dot URL path segment and if neither c is U+002F (/), // nor url is special and c is U+005C (\), append the empty string to url’s path. if (singlePathSegment && c != '/' && !(url.isSpecial() && c == '\\')) { url.path.append(""); } // Otherwise, if buffer is not a single-dot URL path segment, then: else if (!singlePathSegment) { // If url’s scheme is "file", url’s path is empty, and buffer is // a Windows drive letter, then replace the second code point in buffer with U+003A (:). if ("file".equals(url.scheme) && url.path.isEmpty() && isWindowsDriveLetter(p.buffer, false)) { p.buffer.setCharAt(1, ':'); } // Append buffer to url’s path. url.path.append(p.buffer.toString()); } } // Set buffer to the empty string. p.emptyBuffer(); if ( c == '/' || url.isSpecial() && c == '\\') { p.append('/'); } // If c is U+003F (?), then set url’s query to the empty string and state to query state. if (c == '?') { url.query = new StringBuilder(); p.setState(QUERY); } // If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state. if (c == '#') { url.fragment = new StringBuilder(); p.setState(FRAGMENT); } } // Otherwise, run these steps: else { if (p.validate()) { // If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error. if (!isUrlCodePoint(c) && c != '%') { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, // invalid-URL-unit validation error. else if (c == '%' && (p.pointer >= p.input.length() - 2 || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } } // UTF-8 percent-encode c using the path percent-encode set and append the result to buffer. String encoded = p.percentEncode(c, WhatWgUrlParser::pathPercentEncodeSet); if (encoded != null) { p.append(encoded); } else { p.append(c); } } } }, OPAQUE_PATH { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is U+003F (?), then set url’s query to the empty string and state to query state. if (c == '?') { url.query = new StringBuilder(); p.setState(QUERY); } // Otherwise, if c is U+0023 (#), then set url’s fragment to // the empty string and state to fragment state. else if (c == '#') { url.fragment = new StringBuilder(); p.setState(FRAGMENT); } // Otherwise: else { if (p.validate()) { // If c is not the EOF code point, not a URL code point, and not U+0025 (%), // invalid-URL-unit validation error. if (c != EOF && !isUrlCodePoint(c) && c != '%') { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, // invalid-URL-unit validation error. else if (c == '%' && (p.pointer >= p.input.length() - 2 || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } } // If c is not the EOF code point, UTF-8 percent-encode c using // the C0 control percent-encode set and append the result to url’s path. if (c != EOF) { String encoded = p.percentEncode(c, WhatWgUrlParser::c0ControlPercentEncodeSet); if (encoded != null) { url.path.append(encoded); } else { url.path.append(c); } } } } }, QUERY { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If encoding is not UTF-8 and one of the following is true: // - url is not special // - url’s scheme is "ws" or "wss" // then set encoding to UTF-8. if (p.encoding != null && !StandardCharsets.UTF_8.equals(p.encoding) && (!url.isSpecial() || "ws".equals(url.scheme) || "wss".equals(url.scheme))) { p.encoding = StandardCharsets.UTF_8; } // If one of the following is true: // - state override is not given and c is U+0023 (#) // - c is the EOF code point if ( (p.stateOverride == null && c == '#') || c == EOF) { // Let queryPercentEncodeSet be the special-query percent-encode set if url is special; // otherwise the query percent-encode set. IntPredicate queryPercentEncodeSet = (url.isSpecial() ? WhatWgUrlParser::specialQueryPercentEncodeSet : WhatWgUrlParser::queryPercentEncodeSet); // Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, // and append the result to url’s query. String encoded = p.percentEncode(p.buffer.toString(), queryPercentEncodeSet); Assert.state(url.query != null, "Url's query should not be null"); url.query.append(encoded); // Set buffer to the empty string. p.emptyBuffer(); // If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state. if (c == '#') { url.fragment = new StringBuilder(); p.setState(FRAGMENT); } } // Otherwise, if c is not the EOF code point: else { if (p.validate()) { // If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error. if (!isUrlCodePoint(c) && c != '%') { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, // invalid-URL-unit validation error. else if (c == '%' && (p.pointer >= p.input.length() - 2 || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } } // Append c to buffer. p.append(c); } } }, FRAGMENT { @Override public void handle(int c, UrlRecord url, WhatWgUrlParser p) { // If c is not the EOF code point, then: if (c != EOF) { if (p.validate()) { // If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error. if (!isUrlCodePoint(c) && c != '%') { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } // If c is U+0025 (%) and remaining does not start with two ASCII hex digits, // invalid-URL-unit validation error. else if (c == '%' && (p.pointer >= p.input.length() - 2 || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 1)) || !isAsciiHexDigit(p.input.codePointAt(p.pointer + 2)))) { p.validationError("Invalid URL Unit: \"" + (char) c + "\""); } } // UTF-8 percent-encode c using the fragment percent-encode set and // append the result to url’s fragment. String encoded = p.percentEncode(c, WhatWgUrlParser::fragmentPercentEncodeSet); Assert.state(url.fragment != null, "Url's fragment should not be null"); if (encoded != null) { url.fragment.append(encoded); } else { url.fragment.appendCodePoint(c); } } } }; public abstract void handle(int c, UrlRecord url, WhatWgUrlParser p); } /** * A URL is a struct that represents a universal identifier. * To disambiguate from a valid URL string it can also be referred to as a * URL record. */ static final class UrlRecord { private String scheme = ""; @Nullable private StringBuilder username = null; @Nullable private StringBuilder password = null; @Nullable private Host host = null; @Nullable private Port port = null; private Path path = new PathSegments(); @Nullable private StringBuilder query = null; @Nullable private StringBuilder fragment = null; public UrlRecord() { } /** * A URL is special if its scheme is a special scheme. A URL is not special if its scheme is not a special scheme. */ public boolean isSpecial() { return isSpecialScheme(this.scheme); } /** * A URL includes credentials if its username or password is not the empty string. */ public boolean includesCredentials() { return (this.username != null && !this.username.isEmpty() || this.password != null && !this.password.isEmpty()); } /** * A URL has an opaque path if its path is a URL path segment. */ public boolean hasOpaquePath() { return path().isOpaque(); } /** * A URL’s scheme is an ASCII string that identifies the type of URL and * can be used to dispatch a URL for further processing after parsing. * It is initially the empty string. */ public String scheme() { return this.scheme; } /** * The protocol getter steps are to return this’s URL’s scheme, followed by U+003A (:). */ @SuppressWarnings("unused") public String protocol() { return scheme() + ":"; } /** * A URL’s username is an ASCII string identifying a username. * It is initially the empty string. */ public String username() { return (this.username != null ? this.username.toString() : ""); } void appendToUsername(int codePoint) { if (this.username == null) { this.username = new StringBuilder(2); } this.username.appendCodePoint(codePoint); } public void appendToUsername(String s) { if (this.username == null) { this.username = new StringBuilder(s); } else { this.username.append(s); } } /** * A URL’s password is an ASCII string identifying a password. It is initially the empty string. */ public String password() { return (this.password != null ? this.password.toString() : ""); } void appendToPassword(int codePoint) { if (this.password == null) { this.password = new StringBuilder(2); } this.password.appendCodePoint(codePoint); } void appendToPassword(String s) { if (this.password == null) { this.password = new StringBuilder(s); } else { this.password.append(s); } } /** * Convenience method to return the full user info. */ @Nullable public String userInfo() { if (!includesCredentials()) { return null; } StringBuilder userInfo = new StringBuilder(username()); if (!password().isEmpty()) { userInfo.append(':'); userInfo.append(password()); } return userInfo.toString(); } /** * A URL’s host is {@code null} or a {@linkplain Host host}. * It is initially {@code null}. */ @Nullable public Host host() { return this.host; } /** *The host getter steps are: *
    *
  1. Let url be this URL. *
  2. If url’s host is null, then return the empty string. *
  3. If url’s port is null, return url’s host, serialized. *
  4. Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized. *
*/ @SuppressWarnings("unused") public String hostString() { if (host() == null) { return ""; } StringBuilder builder = new StringBuilder(hostname()); Port port = port(); if (port != null) { builder.append(':'); builder.append(port); } return builder.toString(); } public String hostname() { Host host = host(); return (host != null ? host.toString() : ""); } /** * A URL’s port is either null, a string representing a 16-bit unsigned * integer, or a string containing a uri template. *

It is initially {@code null}. */ @Nullable public Port port() { return this.port; } public String portString() { return (port() != null ? port().toString() : ""); } /** * A URL’s path is a URL {@linkplain Path path}, usually identifying a location. *

It is initially {@code « »}. */ public Path path() { return this.path; } public String pathname() { return path().name(); } /** * To shorten a url’s path: *

    *
  1. Assert: url does not have an opaque path.
  2. *
  3. Let path be url’s path.
  4. *
  5. If url’s scheme is "file", path’s size is 1, and path[0] is a * normalized Windows drive letter, then return.
  6. *
  7. Remove path’s last item, if any.
  8. *
*/ public void shortenPath() { this.path.shorten(this.scheme); } /** * A URL’s query is either {@code null} or an ASCII string. *

It is initially {@code null}. */ @Nullable public String query() { return (this.query != null ? this.query.toString() : null); } /** * The search getter steps are: *

    *
  1. If this URL’s query is either null or the empty string, then return the empty string. *
  2. Return U+003F (?), followed by this URL’s query. *
*/ public String search() { String query = query(); if (query == null) { return ""; } else { return "?" + query; } } /** * A URL’s fragment is either {@code null} or an ASCII string * that can be used for further processing on the resource the URL’s * other components identify. *

It is initially {@code null}. */ @Nullable public String fragment() { return (this.fragment != null ? this.fragment.toString() : null); } /** * The hash getter steps are: *

    *
  1. If this URL’s fragment is either null or the empty string, then return the empty string. *
  2. Return U+0023 (#), followed by this’s URL’s fragment. *
*/ @SuppressWarnings("unused") public String hash() { String fragment = fragment(); return (fragment != null && !fragment.isEmpty() ? "#" + fragment : ""); } public String href() { // Let output be url’s scheme and U+003A (:) concatenated. StringBuilder output = new StringBuilder(scheme()); output.append(':'); Host host = host(); // If url’s host is non-null: if (host != null) { // Append "//" to output. output.append("//"); // If url includes credentials, then: if (includesCredentials()) { // Append url’s username to output. output.append(username()); String password = password(); // If url’s password is not the empty string, then append U+003A (:), // followed by url’s password, to output. if (!password.isEmpty()) { output.append(':'); output.append(password); } // Append U+0040 (@) to output. output.append('@'); } // Append url’s host, serialized, to output. output.append(hostname()); Port port = port(); // If url’s port is non-null, append U+003A (:) followed by url’s port, serialized, to output. if (port != null) { output.append(':'); output.append(port()); } } // If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1, // and url’s path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output. else if (!hasOpaquePath() && path() instanceof PathSegments pathSegments && pathSegments.size() > 1 && pathSegments.get(0).isEmpty()) { output.append("/."); } // Append the result of URL path serializing url to output. output.append(pathname()); // If url’s query is non-null, append U+003F (?), followed by url’s query, to output. String query = query(); if (query != null) { output.append('?'); output.append(query); } // If exclude fragment is false and url’s fragment is non-null, then append U+0023 (#), // followed by url’s fragment, to output. String fragment = fragment(); if (fragment != null) { output.append('#'); output.append(fragment); } // Return output. return output.toString(); } @Override public boolean equals(Object obj) { if (obj == this) { return true; } if (obj == null || obj.getClass() != this.getClass()) { return false; } UrlRecord that = (UrlRecord) obj; return Objects.equals(this.scheme(), that.scheme()) && Objects.equals(this.username(), that.username()) && Objects.equals(this.password(), that.password()) && Objects.equals(this.host(), that.host()) && Objects.equals(this.port(), that.port()) && Objects.equals(this.path(), that.path()) && Objects.equals(this.query(), that.query()) && Objects.equals(this.fragment(), that.fragment()); } @Override public int hashCode() { return Objects.hash( this.scheme, this.username, this.password, this.host, this.port, this.path, this.query, this.fragment); } @Override public String toString() { return "UrlRecord[" + "scheme=" + this.scheme + ", " + "username=" + this.username + ", " + "password=" + this.password + ", " + "host=" + this.host + ", " + "port=" + this.port + ", " + "path=" + this.path + ", " + "query=" + this.query + ", " + "fragment=" + this.fragment + ']'; } } /** * A host is a domain, an IP address, an opaque host, or an empty host. * Typically, a host serves as a network address, but it is sometimes used as * opaque identifier in URLs where a network address is not necessary. */ sealed interface Host permits Domain, EmptyHost, IpAddressHost, OpaqueHost { /** * The host parser takes a scalar value string input with an optional * boolean isOpaque (default false), and then runs these steps. * They return failure or a host. */ static Host parse(String input, boolean isOpaque, WhatWgUrlParser p) { // If input starts with U+005B ([), then: if (!input.isEmpty() && input.codePointAt(0) == '[') { int last = input.length() - 1; // If input does not end with U+005D (]), IPv6-unclosed validation error, return failure. if (input.codePointAt(last) != ']') { throw new InvalidUrlException("IPv6 address is missing the closing \"]\")."); } // Return the result of IPv6 parsing input // with its leading U+005B ([) and trailing U+005D (]) removed. String ipv6Host = input.substring(1, last); return new IpAddressHost(Ipv6Address.parse(ipv6Host)); } // If isOpaque is true, then return the result of opaque-host parsing input. if (isOpaque) { return OpaqueHost.parse(input, p); } // Assert: input is not the empty string. Assert.state(!input.isEmpty(), "Input should not be empty"); // Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input. String domain = percentDecode(input); // Let asciiDomain be the result of running domain to ASCII with domain and false. String asciiDomain = domainToAscii(domain, false); for (int i=0; i < asciiDomain.length(); i++) { int ch = asciiDomain.codePointAt(i); // If asciiDomain contains a forbidden domain code point, // domain-invalid-code-point validation error, return failure. if (isForbiddenDomain(ch)) { throw new InvalidUrlException("Invalid character \"" + ch + "\" in domain \"" + input + "\""); } } // If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain. if (endsInNumber(asciiDomain)) { Ipv4Address address = Ipv4Address.parse(asciiDomain, p); return new IpAddressHost(address); } // Return asciiDomain. else { return new Domain(asciiDomain); } } private static boolean endsInNumber(String input) { // Let parts be the result of strictly splitting input on U+002E (.). LinkedList parts = strictSplit(input, '.'); if (parts.isEmpty()) { return false; } // If the last item in parts is the empty string, then: if (parts.getLast().isEmpty()) { // If parts’s size is 1, then return false. if (parts.size() == 1) { return false; } // Remove the last item from parts. parts.removeLast(); } // Let last be the last item in parts. String last = parts.getLast(); // If last is non-empty and contains only ASCII digits, then return true. if (!last.isEmpty() && containsOnlyAsciiDigits(last)) { return true; } // If parsing last as an IPv4 number does not return failure, then return true. ParseIpv4NumberResult result = Ipv4Address.parseIpv4Number(last); return result != ParseIpv4NumberFailure.INSTANCE; } } /** * A domain is a non-empty ASCII string that identifies a realm within a network. [RFC1034]. */ static final class Domain implements Host { private final String domain; Domain(String domain) { this.domain = domain; } public String domain() { return this.domain; } @Override public boolean equals(Object o) { if (o == this) { return true; } else if (o instanceof Domain other) { return this.domain.equals(other.domain); } else { return false; } } @Override public int hashCode() { return this.domain.hashCode(); } @Override public String toString() { return this.domain; } } static final class IpAddressHost implements Host { private final IpAddress address; private final String addressString; IpAddressHost(IpAddress address) { this.address = address; if (address instanceof Ipv6Address) { this.addressString = "[" + address + "]"; } else { this.addressString = address.toString(); } } @SuppressWarnings("unused") public IpAddress address() { return this.address; } @Override public boolean equals(Object obj) { if (obj == this) { return true; } else if (obj instanceof IpAddressHost other) { return this.address.equals(other.address); } else { return false; } } @Override public int hashCode() { return this.address.hashCode(); } @Override public String toString() { return this.addressString; } } static final class OpaqueHost implements Host { private final String host; private OpaqueHost(String host) { this.host = host; } /** * The opaque-host parser takes a scalar value string input, * and then runs these steps. They return failure or an opaque host. */ public static OpaqueHost parse(String input, WhatWgUrlParser p) { for (int i = 0; i < input.length(); i++) { int ch = input.codePointAt(i); // If input contains a forbidden host code point, h // ost-invalid-code-point validation error, return failure. if (isForbiddenHost(ch)) { throw new InvalidUrlException("An opaque host contains a forbidden host code point."); } // If input contains a code point that is not a URL code point and not U+0025 (%), // invalid-URL-unit validation error. if (p.validate() && !isUrlCodePoint(ch) && ch != '%') { p.validationError("Code point \"" + ch + "\" is not a URL unit."); } // If input contains a U+0025 (%) and the two code points following it // are not ASCII hex digits, invalid-URL-unit validation error. if (p.validate() && ch == '%' && (input.length() - i < 2 || !isAsciiDigit(input.codePointAt(i + 1)) || !isAsciiDigit(input.codePointAt(i + 2)))) { p.validationError("Code point \"" + ch + "\" is not a URL unit."); } } // Return the result of running UTF-8 percent-encode on input // using the C0 control percent-encode set. String encoded = p.percentEncode(input, WhatWgUrlParser::c0ControlPercentEncodeSet); return new OpaqueHost(encoded); } @Override public boolean equals(Object obj) { if (obj == this) { return true; } else if (obj instanceof OpaqueHost other) { return this.host.equals(other.host); } else { return false; } } @Override public int hashCode() { return this.host.hashCode(); } @Override public String toString() { return this.host; } } static final class EmptyHost implements Host { static final EmptyHost INSTANCE = new EmptyHost(); private EmptyHost() { } @Override public boolean equals(Object obj) { return obj == this || obj != null && obj.getClass() == this.getClass(); } @Override public int hashCode() { return 1; } @Override public String toString() { return ""; } } sealed interface IpAddress permits Ipv4Address, Ipv6Address { } static final class Ipv4Address implements IpAddress { private final int address; private final String string; Ipv4Address(int address) { this.address = address; this.string = serialize(address); } /** * The IPv4 serializer takes an IPv4 address {@code address} and then runs these steps. * They return an ASCII string. */ private static String serialize(int address) { //Let output be the empty string. StringBuilder output = new StringBuilder(); //Let n be the value of address. int n = address; //For each i in the range 1 to 4, inclusive: for (int i = 1; i <= 4; i++) { // Prepend n % 256, serialized, to output. output.insert(0, Integer.toUnsignedString(Integer.remainderUnsigned(n, 256))); //If i is not 4, then prepend U+002E (.) to output. if (i != 4) { output.insert(0, '.'); } //Set n to floor(n / 256). n = Math.floorDiv(n, 256); } //Return output. return output.toString(); } public static Ipv4Address parse(String input, WhatWgUrlParser p) { // Let parts be the result of strictly splitting input on U+002E (.). List parts = strictSplit(input, '.'); int partsSize = parts.size(); // If the last item in parts is the empty string, then: if (parts.get(partsSize - 1).isEmpty()) { // IPv4-empty-part validation error. p.validationError("IPv4 address ends with \".\""); // If parts’s size is greater than 1, then remove the last item from parts. if (partsSize > 1) { parts.remove(partsSize - 1); partsSize--; } } // If parts’s size is greater than 4, IPv4-too-many-parts validation error, return failure. if (partsSize > 4) { throw new InvalidUrlException("IPv4 address does not consist of exactly 4 parts."); } // Let numbers be an empty list. List numbers = new ArrayList<>(partsSize); // For each part of parts: for (int i = 0; i < partsSize; i++) { String part = parts.get(i); // Let result be the result of parsing part. ParseIpv4NumberResult result = parseIpv4Number(part); // If result is failure, IPv4-non-numeric-part validation error, return failure. if (result == ParseIpv4NumberFailure.INSTANCE) { p.failure("An IPv4 address part is not numeric."); } else { ParseIpv4NumberSuccess success = (ParseIpv4NumberSuccess) result; if (p.validate() && success.validationError()) { p.validationError( "The IPv4 address contains numbers expressed using hexadecimal or octal digits."); } // Append result to numbers. numbers.add(success.number()); } } for (Iterator iterator = numbers.iterator(); iterator.hasNext(); ) { Integer number = iterator.next(); // If any item in numbers is greater than 255, IPv4-out-of-range-part validation error. if (p.validate() && number > 255) { p.validationError("An IPv4 address part exceeds 255."); } if (iterator.hasNext()) { // If any but the last item in numbers is greater than 255, then return failure. if (number > 255) { throw new InvalidUrlException("An IPv4 address part exceeds 255."); } } else { // If the last item in numbers is greater than or equal to 256^(5 − numbers’s size), // then return failure. double limit = Math.pow(256, (5 - numbers.size())); if (number >= limit) { throw new InvalidUrlException( "IPv4 address part " + number + " exceeds " + limit + ".'"); } } } // Let ipv4 be the last item in numbers. int ipv4 = numbers.get(numbers.size() - 1); // Remove the last item from numbers. numbers.remove(numbers.size() - 1); // Let counter be 0. int counter = 0; // For each n of numbers: for (Integer n : numbers) { // Increment ipv4 by n × 256^(3 − counter). int increment = n * (int) Math.pow(256, 3 - counter); ipv4 += increment; // Increment counter by 1. counter++; } // Return ipv4. return new Ipv4Address(ipv4); } /** * The IPv4 number parser takes an ASCII string input and then runs these steps. * They return failure or a tuple of a number and a boolean. */ private static ParseIpv4NumberResult parseIpv4Number(String input) { // If input is the empty string, then return failure. if (input.isEmpty()) { return ParseIpv4NumberFailure.INSTANCE; } // Let validationError be false. boolean validationError = false; // Let R be 10. int r = 10; int len = input.length(); // If input contains at least two code points and // the first two code points are either "0X" or "0x", then: if (len >= 2) { int ch0 = input.codePointAt(0); int ch1 = input.codePointAt(1); if (ch0 == '0' && (ch1 == 'X' || ch1 == 'x')) { // Set validationError to true. validationError = true; // Remove the first two code points from input. input = input.substring(2); // Set R to 16. r = 16; } // Otherwise, if input contains at least two code points and // the first code point is U+0030 (0), then: else if (ch0 == '0') { // Set validationError to true. validationError = true; // Remove the first code point from input. input = input.substring(1); // Set R to 8. r = 8; } } // If input is the empty string, then return (0, true). if (input.isEmpty()) { return new ParseIpv4NumberSuccess(0, true); } // If input contains a code point that is not a radix-R digit, then return failure. for (int i = 0; i < input.length(); i++) { int c = input.codePointAt(i); int digit = Character.digit(c, r); if (digit == -1) { return ParseIpv4NumberFailure.INSTANCE; } } try { // Let output be the mathematical integer value that is represented by // input in radix-R notation, using ASCII hex digits for digits with values 0 through 15. int output = Integer.parseInt(input, r); // Return (output, validationError). return new ParseIpv4NumberSuccess(output, validationError); } catch (NumberFormatException ex) { return ParseIpv4NumberFailure.INSTANCE; } } @Override public boolean equals(Object o) { if (o == this) { return true; } else if (o instanceof Ipv4Address other) { return this.address == other.address; } else { return false; } } @Override public int hashCode() { return this.address; } @Override public String toString() { return this.string; } } static final class Ipv6Address implements IpAddress { private final int[] pieces; private final String string; private Ipv6Address(int[] pieces) { Assert.state(pieces.length == 8, "Invalid amount of IPv6 pieces"); this.pieces = pieces; this.string = serialize(pieces); } /** * The IPv6 parser takes a scalar value string input and then runs these steps. * They return failure or an IPv6 address. */ public static Ipv6Address parse(String input) { // Let address be a new IPv6 address whose IPv6 pieces are all 0. int[] address = new int[8]; // Let pieceIndex be 0. int pieceIndex = 0; // Let compress be null. Integer compress = null; // Let pointer be a pointer for input. int pointer = 0; int inputLength = input.length(); int c = (inputLength > 0) ? input.codePointAt(0) : EOF; // If c is U+003A (:), then: if (c == ':') { // If remaining does not start with U+003A (:), // IPv6-invalid-compression validation error, return failure. if (inputLength > 1 && input.codePointAt(1) != ':') { throw new InvalidUrlException("IPv6 address begins with improper compression."); } // Increase pointer by 2. pointer += 2; // Increase pieceIndex by 1 and then set compress to pieceIndex. pieceIndex++; compress = pieceIndex; } c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; // While c is not the EOF code point: while (c != EOF) { // If pieceIndex is 8, IPv6-too-many-pieces validation error, return failure. if (pieceIndex == 8) { throw new InvalidUrlException("IPv6 address contains more than 8 pieces."); } // If c is U+003A (:), then: if (c == ':') { // If compress is non-null, IPv6-multiple-compression validation error, return failure. if (compress != null) { throw new InvalidUrlException("IPv6 address is compressed in more than one spot."); } // Increase pointer and pieceIndex by 1, set compress to pieceIndex, and then continue. pointer++; pieceIndex++; compress = pieceIndex; c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; continue; } // Let value and length be 0. int value = 0; int length = 0; // While length is less than 4 and c is an ASCII hex digit, // set value to value × 0x10 + c interpreted as hexadecimal number, // and increase pointer and length by 1. while (length < 4 && isAsciiHexDigit(c)) { int cHex = Character.digit(c, 16); value = (value * 0x10) + cHex; pointer++; length++; c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; } // If c is U+002E (.), then: if (c == '.') { // If length is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure. if (length == 0) { throw new InvalidUrlException( "IPv6 address with IPv4 address syntax: IPv4 part is empty."); } // Decrease pointer by length. pointer -= length; // If pieceIndex is greater than 6, // IPv4-in-IPv6-too-many-pieces validation error, return failure. if (pieceIndex > 6) { throw new InvalidUrlException( "IPv6 address with IPv4 address syntax: IPv6 address has more than 6 pieces."); } // Let numbersSeen be 0. int numbersSeen = 0; c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; // While c is not the EOF code point: while (c != EOF) { // Let ipv4Piece be null. Integer ipv4Piece = null; // If numbersSeen is greater than 0, then: if (numbersSeen > 0) { // If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1. if (c =='.' && numbersSeen < 4) { pointer++; c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; } // Otherwise, IPv4-in-IPv6-invalid-code-point validation error, return failure. else { throw new InvalidUrlException( "IPv6 address with IPv4 address syntax: " + "IPv4 part is empty or contains a non-ASCII digit."); } } // If c is not an ASCII digit, // IPv4-in-IPv6-invalid-code-point validation error, return failure. if (!isAsciiDigit(c)) { throw new InvalidUrlException( "IPv6 address with IPv4 address syntax: IPv4 part contains a non-ASCII digit."); } // While c is an ASCII digit: while (isAsciiDigit(c)) { // Let number be c interpreted as decimal number. int number = Character.digit(c, 10); // If ipv4Piece is null, then set ipv4Piece to number. if (ipv4Piece == null) { ipv4Piece = number; } // Otherwise, if ipv4Piece is 0, // IPv4-in-IPv6-invalid-code-point validation error, return failure. else if (ipv4Piece == 0) { throw new InvalidUrlException( "IPv6 address with IPv4 address syntax: IPv4 part contains a non-ASCII digit."); } // Otherwise, set ipv4Piece to ipv4Piece × 10 + number. else { ipv4Piece = ipv4Piece * 10 + number; } // If ipv4Piece is greater than 255, // IPv4-in-IPv6-out-of-range-part validation error, return failure. if (ipv4Piece > 255) { throw new InvalidUrlException( "IPv6 address with IPv4 address syntax: IPv4 part exceeds 255."); } // Increase pointer by 1. pointer++; c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; } // Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece. address[pieceIndex] = address[pieceIndex] * 0x100 + (ipv4Piece != null ? ipv4Piece : 0); // Increase numbersSeen by 1. numbersSeen++; // If numbersSeen is 2 or 4, then increase pieceIndex by 1. if (numbersSeen == 2 || numbersSeen == 4) { pieceIndex++; } c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; } // If numbersSeen is not 4, // IPv4-in-IPv6-too-few-parts validation error, return failure. if (numbersSeen != 4) { throw new InvalidUrlException( "IPv6 address with IPv4 address syntax: IPv4 address contains too few parts."); } // Break. break; } // Otherwise, if c is U+003A (:): else if (c == ':') { // Increase pointer by 1. pointer++; c = (pointer < inputLength) ? input.codePointAt(pointer) : EOF; // If c is the EOF code point, IPv6-invalid-code-point validation error, return failure. if (c == EOF) { throw new InvalidUrlException("IPv6 address unexpectedly ends."); } } // Otherwise, if c is not the EOF code point, // IPv6-invalid-code-point validation error, return failure. else if (c != EOF) { throw new InvalidUrlException( "IPv6 address contains \"" + Character.toString(c) + "\", which is " + "neither an ASCII hex digit nor a ':'."); } // Set address[pieceIndex] to value. address[pieceIndex] = value; // Increase pieceIndex by 1. pieceIndex++; } // If compress is non-null, then: if (compress != null) { // Let swaps be pieceIndex − compress. int swaps = pieceIndex - compress; // Set pieceIndex to 7. pieceIndex = 7; // While pieceIndex is not 0 and swaps is greater than 0, // swap address[pieceIndex] with address[compress + swaps − 1], and // then decrease both pieceIndex and swaps by 1. while (pieceIndex != 0 && swaps > 0) { int tmp = address[pieceIndex]; address[pieceIndex] = address[compress + swaps - 1]; address[compress + swaps - 1] = tmp; pieceIndex--; swaps--; } } // Otherwise, if compress is null and pieceIndex is not 8, // IPv6-too-few-pieces validation error, return failure. else if (pieceIndex != 8) { throw new InvalidUrlException("An uncompressed IPv6 address contains fewer than 8 pieces."); } // Return address. return new Ipv6Address(address); } /** * The IPv6 serializer takes an IPv6 address {@code address} and * then runs these steps. They return an ASCII string. */ private static String serialize(int[] address) { // Let output be the empty string. StringBuilder output = new StringBuilder(); // Let compress be an index to the first IPv6 piece in // the first longest sequences of address’s IPv6 pieces that are 0. int compress = longestSequenceOf0Pieces(address); // Let ignore0 be false. boolean ignore0 = false; // For each pieceIndex in the range 0 to 7, inclusive: for (int pieceIndex = 0; pieceIndex <= 7; pieceIndex++) { // If ignore0 is true and address[pieceIndex] is 0, then continue. if (ignore0 && address[pieceIndex] == 0) { continue; } // Otherwise, if ignore0 is true, set ignore0 to false. else if (ignore0) { ignore0 = false; } // If compress is pieceIndex, then: if (compress == pieceIndex) { // Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise. String separator = (pieceIndex == 0) ? "::" : ":"; // Append separator to output. output.append(separator); // Set ignore0 to true and continue. ignore0 = true; continue; } // Append address[pieceIndex], represented as // the shortest possible lowercase hexadecimal number, to output. output.append(Integer.toHexString(address[pieceIndex])); // If pieceIndex is not 7, then append U+003A (:) to output. if (pieceIndex != 7) { output.append(':'); } } // Return output. return output.toString(); } private static int longestSequenceOf0Pieces(int[] pieces) { int longestStart = -1; int longestLength = -1; int start = -1; for (int i = 0; i < pieces.length + 1; i++) { if (i < pieces.length && pieces[i] == 0) { if (start < 0) { start = i; } } else if (start >= 0) { int length = i - start; if (length > longestLength) { longestStart = start; longestLength = length; } start = -1; } } // If there is no sequence of address’s IPv6 pieces // that are 0 that is longer than 1, then set compress to null. if (longestLength > 1) { return longestStart; } else { return -1; } } @Override public boolean equals(Object obj) { if (obj == this) { return true; } else if (obj instanceof Ipv6Address other) { return Arrays.equals(this.pieces, other.pieces); } else { return false; } } @Override public int hashCode() { return Arrays.hashCode(this.pieces); } @Override public String toString() { return this.string; } } sealed interface Port permits StringPort, IntPort { } static final class StringPort implements Port { private final String port; public StringPort(String port) { this.port = port; } public String value() { return this.port; } @Override public String toString() { return this.port; } } static final class IntPort implements Port { private final int port; public IntPort(int port) { this.port = port; } public int value() { return this.port; } @Override public String toString() { return Integer.toString(this.port); } } sealed interface Path permits PathSegment, PathSegments { void append(int codePoint); void append(String s); boolean isEmpty(); void shorten(String scheme); boolean isOpaque(); Path clone(); String name(); } static final class PathSegment implements Path { @Nullable private StringBuilder builder = null; @Nullable String segment; PathSegment(String segment) { this.segment = segment; } PathSegment(int codePoint) { append(codePoint); } public String segment() { String result = this.segment; if (result == null) { Assert.state(this.builder != null, "String nor StringBuilder available"); result = this.builder.toString(); this.segment = result; } return result; } @Override public void append(int codePoint) { this.segment = null; if (this.builder == null) { this.builder = new StringBuilder(2); } this.builder.appendCodePoint(codePoint); } @Override public void append(String s) { this.segment = null; if (this.builder == null) { this.builder = new StringBuilder(s); } else { this.builder.append(s); } } @Override public String name() { String name = segment(); if (name.startsWith("/")) { name = name.substring(1); } return name; } @Override public boolean isEmpty() { if (this.segment != null) { return this.segment.isEmpty(); } else { Assert.state(this.builder != null, "String nor StringBuilder available"); return this.builder.isEmpty(); } } @Override public void shorten(String scheme) { throw new IllegalStateException("Opaque path not expected"); } @Override public boolean isOpaque() { return true; } @SuppressWarnings("MethodDoesntCallSuperMethod") @Override public Path clone() { return new PathSegment(segment()); } @Override public boolean equals(Object o) { if (o == this) { return true; } else if (o instanceof PathSegment other) { return segment().equals(other.segment()); } else { return false; } } @Override public int hashCode() { return segment().hashCode(); } @Override public String toString() { return segment(); } } static final class PathSegments implements Path { private final List segments; public PathSegments() { this.segments = new ArrayList<>(); } public PathSegments(List segments) { this.segments = new ArrayList<>(segments); } @Override public void append(int codePoint) { this.segments.add(new PathSegment(codePoint)); } @Override public void append(String segment) { this.segments.add(new PathSegment(segment)); } public int size() { return this.segments.size(); } public String get(int i) { return this.segments.get(i).segment(); } @Override public boolean isEmpty() { return this.segments.isEmpty(); } @Override public void shorten(String scheme) { int size = size(); if ("file".equals(scheme) && size == 1 && isWindowsDriveLetter(get(0), true)) { return; } if (!isEmpty()) { this.segments.remove(size - 1); } } @Override public boolean isOpaque() { return false; } @SuppressWarnings("MethodDoesntCallSuperMethod") @Override public Path clone() { return new PathSegments(this.segments); } @Override public String name() { StringBuilder output = new StringBuilder(); for (PathSegment segment : this.segments) { output.append('/'); output.append(segment.name()); } return output.toString(); } @Override public boolean equals(Object o) { if (o == this) { return true; } else if (o instanceof PathSegments other) { return this.segments.equals(other.segments); } else { return false; } } @Override public int hashCode() { return this.segments.hashCode(); } @Override public String toString() { StringBuilder output = new StringBuilder(); for (PathSegment segment : this.segments) { output.append(segment); } return output.toString(); } } private sealed interface ParseIpv4NumberResult permits ParseIpv4NumberFailure, ParseIpv4NumberSuccess { } private record ParseIpv4NumberSuccess(int number, boolean validationError) implements ParseIpv4NumberResult { } private static final class ParseIpv4NumberFailure implements ParseIpv4NumberResult { public static final ParseIpv4NumberFailure INSTANCE = new ParseIpv4NumberFailure(); private ParseIpv4NumberFailure() { } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy