cat.inspiracio.url.Machine Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of url-parser Show documentation
A JavaBean for URLs of the HTTP protocol.
There is a newer version: 0.0.1
/*
Copyright 2015 Alexander Bunkenburg 

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cat.inspiracio.url;

import static cat.inspiracio.url.State.AUTHORITY;
import static cat.inspiracio.url.State.FILE;
import static cat.inspiracio.url.State.FILE_HOST;
import static cat.inspiracio.url.State.FILE_SLASH;
import static cat.inspiracio.url.State.FRAGMENT;
import static cat.inspiracio.url.State.HOST;
import static cat.inspiracio.url.State.HOSTNAME;
import static cat.inspiracio.url.State.NON_RELATIVE_PATH;
import static cat.inspiracio.url.State.NO_SCHEME;
import static cat.inspiracio.url.State.PATH;
import static cat.inspiracio.url.State.PATH_OR_AUTHORITY;
import static cat.inspiracio.url.State.PATH_START;
import static cat.inspiracio.url.State.PORT;
import static cat.inspiracio.url.State.QUERY;
import static cat.inspiracio.url.State.RELATIVE;
import static cat.inspiracio.url.State.RELATIVE_SLASH;
import static cat.inspiracio.url.State.SCHEME;
import static cat.inspiracio.url.State.SCHEME_START;
import static cat.inspiracio.url.State.SPECIAL_AUTHORITY_IGNORE_SLASHES;
import static cat.inspiracio.url.State.SPECIAL_AUTHORITY_SLASHES;
import static cat.inspiracio.url.State.SPECIAL_RELATIVE_OR_AUTHORITY;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.Arrays;
import java.util.List;

import cat.inspiracio.lang.NotImplementedException;

/** Encapsulate all the state that is being changed by the finite state machine. */
class Machine{

    /** Maximum legal value for a port. A port is a two-byte integer. 2^16 -1 */
    private final static int MAX_PORT=65535; //(Short.MAX_VALUE+1)*2 -1;
    
    //input parameters
    private String input;
    private URL base;
    private String encodingOverride;
    private URL url;
    private State stateOverride;
    
    //state of the machine
    private State state;
    private StringBuilder buffer;
    //private boolean flagAt=false;
    private boolean flagSquare=false;
    private int pointer=0;
    private boolean EOF=false;
    private char c;
    
    Machine(String input, URL base, String encodingOverride, URL url, State stateOverride){
        this.input=input;
        this.base=base;
        this.encodingOverride=encodingOverride;
        this.url=url;
        this.stateOverride=stateOverride;
    }
    
    /** Initialise the finite state machine. */
    void init(){
        //1. If url is not given:
        if(url==null){
            //1. Set url to a new URL.
            url=new URL();
        }
        //In the spec the following two steps are inside the preceding if-command, but I think that is wrong.
        //2. If input contains any leading or trailing C0 controls and space, syntax violation.
        //if(false)syntaxViolation();
        //3. Remove any leading and trailing C0 controls and space from input.
        input=clean(input);
        
        //2. Let state be state override if given, or scheme start state otherwise.
        state= stateOverride!=null ? stateOverride : SCHEME_START;
        
        //3. If base is not given, set it to null.
        if(base==null)
            base=new URL();//all the parts are null
        
        //4. If encoding override is not given, set it to utf-8.
        if(encodingOverride==null)
            encodingOverride="UTF-8";
        
        //5. Let buffer be the empty string.
        buffer=new StringBuilder();
        
        //6. Let the @ flag and the [] flag be unset.
        //flagAt=false;
        flagSquare=false;

        //7. Let pointer be a pointer to first code point in input.
        pointer=0;
    }
    
    /** Run the finite state machine until it returns a URL or throws an 
     * exception. */
    URL run(){
        //8. Keep running the following state machine by switching on state. If 
        //after a run pointer points to the EOF code point, go to the next step. 
        //Otherwise, increase pointer by one and continue with the state machine.
        //I implement this by thinking of string input as having input.length+1
        //characters: the last one is the fictitious EOF code point.
        while(pointer<=input.length()){
            nextChar();
            
            switch(state){
        
            case SCHEME_START:schemeStart();break;
            case SCHEME:scheme();break;
            case NO_SCHEME:noScheme();break;
            case SPECIAL_RELATIVE_OR_AUTHORITY:specialRelativeOrAuthority();break;
            case PATH_OR_AUTHORITY:pathOrAuthority();break;
            case RELATIVE:relative();break;
            case RELATIVE_SLASH:relativeSlash();break;
            case SPECIAL_AUTHORITY_SLASHES:specialAuthoritySlashes();break;
            case SPECIAL_AUTHORITY_IGNORE_SLASHES:specialAuthorityIgnoreSlashes();break;
            case AUTHORITY:authority();break;
            
            case HOST:
            case HOSTNAME: host(); break;
            
            case PORT:port();break;
            case FILE:file();break;
            case FILE_SLASH:fileSlash();break;
            case FILE_HOST:fileHost();break;
            case PATH_START:pathStart();break;
            case PATH:path();break;
            case NON_RELATIVE_PATH:nonRelativePath();break;
            case QUERY:query();break;
            case FRAGMENT:fragment();break;
                
            default:
                throw new NotImplementedException(state);//I forgot a state.
            }
            
            pointer++;
        }//while

        return url;//9. Return url.
    }
    
    /** Get next char, which may be the fictitious EOF code point.*/
    private void nextChar(){
        if(pointer==input.length()){
            EOF=true;
            c=0;//irrelevant, but choose something different from every character
        }else{
            EOF=false;
            c=input.charAt(pointer);
        }
    }

    /** Try to parse the start of the scheme. */
    private void schemeStart(){
        
        //1. If c is an ASCII alpha, append c, lowercased, to buffer, and set state 
        //to scheme state.
        if(isASCIIAlpha(c)){
            c=Character.toLowerCase(c);
            buffer.append(c);
            state=SCHEME;
        }
        
        //2. Otherwise, if state override is not given, set state to no scheme state, 
        //and decrease pointer by one.
        else if(stateOverride==null){
            state=NO_SCHEME;
            pointer--;
        }
        
        //3. Otherwise, syntax violation, terminate this algorithm.
        else
            throw new SyntaxViolation();//throwing exception terminates the algorithm
    }
    
    /** Try to parse the scheme. */
    private void scheme(){
        
        //1. If c is an ASCII alphanumeric, "+", "-", or ".", append c, lowercased, to buffer.
        if(isASCIIAlphaNumeric(c) || c=='+' || c=='-' || c=='.'){
            c=Character.toLowerCase(c);
            buffer.append(c);
        }
        
        //2. Otherwise, if c is ":", run these substeps:
        else if(c==':'){
            //1. If state override is given, run these subsubsteps:
            if(stateOverride!=null){
                //1. If url’s scheme is a special scheme and buffer is not, terminate this algorithm.
                //2. If url’s scheme is not a special scheme and buffer is, terminate this algorithm.
                if(url.isSpecial() != isSpecialScheme(buffer))
                    terminate();
            }

            //2. Set url’s scheme to buffer.
            url.scheme(buffer.toString());
            
            //3. Set buffer to the empty string.
            empty(buffer);
         
            //4. If state override is given, terminate this algorithm.
            if(stateOverride!=null)
                terminate();
            
            //5. If url’s scheme is "file", run these subsubsteps:
            if("file".equals(url.scheme())){
                //1. If remaining does not start with "//", syntax violation.
                if(!remaining().startsWith("//"))
                    syntaxViolation();
                
                //2. Set state to file state.
                state=FILE;
            }
            
            //6. Otherwise, if url is special, base is non-null, and base’s scheme is 
            //equal to url’s scheme, set state to special relative or authority state.
            else if(url.isSpecial() && base!=null && equals(base.scheme(), url.scheme()))
                state=SPECIAL_RELATIVE_OR_AUTHORITY;
            
            //7. Otherwise, if url is special, set state to special authority slashes state.
            else if(url.isSpecial())
                state=SPECIAL_AUTHORITY_SLASHES;
            
            //8. Otherwise, if remaining starts with an "/", set state to path or 
            //authority state, and increase pointer by one.
            else if(remaining().startsWith("/")){
                state=PATH_OR_AUTHORITY;
                pointer++;
            }
            
            //9. Otherwise, set url’s non-relative flag, append an empty string to 
            //url’s path, and set state to non-relative path state.
            else{
                url.setNonRelative(true);
                appendPath(url, "");//append an empty string to url’s path
                state=NON_RELATIVE_PATH;
            }
        }
        
        //3. Otherwise, if state override is not given, set buffer to the empty string, 
        //state to no scheme state, and start over (from the first code point in input).
        else if(stateOverride==null){
            empty(buffer);
            state=NO_SCHEME;
            //Start over, from the first code point in input:
            pointer=-1;//initialisation for next iteration will increment by 1, to get pointer==0.
        }
        
        //4. Otherwise, syntax violation, terminate this algorithm.
        else 
            throw new SyntaxViolation();//terminate
    }

    /** Try to parse a URL without scheme. */
    private void noScheme(){
        //1. If base is null, or base’s non-relative flag is set and c is 
        //not "#", syntax violation, return failure.
        if(base==null || (base.isNonRelative() && c!='#')){
            syntaxViolation();
            fail();
        }
        
        //2. Otherwise, if base’s non-relative flag is set and c is "#", set 
        //url’s scheme to base’s scheme, url’s path to base’s path, url’s 
        //query to base’s query, url’s fragment to the empty string, set 
        //url’s non-relative flag, and set state to fragment state.
        else if(base.isNonRelative() && c=='#'){
            url.scheme(base.scheme());
            url.path(base.path());
            url.parameters(clone(base.parameters()));
            url.fragment("");
            url.setNonRelative(true);
            state=FRAGMENT;
        }
        
        //3. Otherwise, if base’s scheme is not "file", set state to relative 
        //state and decrease pointer by one.
        else if(!"file".equals(base.scheme())){
            state=RELATIVE;
            pointer--;
        }
        
        //4. Otherwise, set state to file state and decrease pointer by one.
        else{
            state=FILE;
            pointer--;
        }
    }
    
    private void specialRelativeOrAuthority(){
        //If c is "/" and remaining starts with "/", set state to special 
        //authority ignore slashes state and increase pointer by one.
        if(c=='/' && remaining().startsWith("/")){
            state=SPECIAL_AUTHORITY_IGNORE_SLASHES;
            pointer++;
        }
        
        //Otherwise, syntax violation, set state to relative state and 
        //decrease pointer by one.
        else{
            syntaxViolation();//but don't terminate algorithm
            state=RELATIVE;
            pointer--;
        }
    }
    
    private void pathOrAuthority(){
        //If c is "/", set state to authority state.
        if(c=='/')
            state=AUTHORITY;
        
        //Otherwise, set state to path state, and decrease pointer by one.
        else{
            state=PATH;
            pointer--;
        }
    }
    
    private void relative(){
        
        //Set url’s scheme to base’s scheme, and then, switching on c:
        url.scheme(base.scheme());
        
        if(EOF){
            //Set url’s username to base’s username, url’s password to 
            //base’s password, url’s host to base’s host, url’s port to 
            //base’s port, url’s path to base’s path, and url’s query to base’s query.
            url.username(base.username());
            url.password(base.password());
            url.server(base.server());//host
            url.port(base.port());
            url.path(base.path());
            url.parameters(clone(base.parameters()));
        }
        
        else {
            switch(c){
        
            case '/': 
                //Set state to relative slash state.
                state=RELATIVE_SLASH;
                break;
            
            case '?': 
                //Set url’s username to base’s username, url’s password to 
                //base’s password, url’s host to base’s host, url’s port to 
                //base’s port, url’s path to base’s path, url’s query to the 
                //empty string, and state to query state.
                url.username(base.username());
                url.password(base.password());
                url.server(base.server());//host
                url.port(base.port());
                url.path(base.path());
                url.parameters(clone(base.parameters()));
                state=QUERY;
                break;
            
            case '#': 
                //Set url’s username to base’s username, url’s password 
                //to base’s password, url’s host to base’s host, url’s 
                //port to base’s port, url’s path to base’s path, url’s 
                //query to base’s query, url’s fragment to the empty string, 
                //and state to fragment state.
                url.username(base.username());
                url.password(base.password());
                url.server(base.server());//host
                url.port(base.port());
                url.path(base.path());
                url.setParameters(clone(base.getParameters()));
                url.fragment(base.fragment());
                state=FRAGMENT;
                break;
            
            default: 
                //If url is special and c is "\", syntax violation, set state to relative slash state.
                if(url.isSpecial() && c=='\\'){
                    syntaxViolation();
                    state=RELATIVE_SLASH;
                }
                else{
                    //1. Set url’s username to base’s username, url’s password to base’s password, 
                    //url’s host to base’s host, url’s port to base’s port, url’s path to base’s path, 
                    //and then remove url’s path’s last entry, if any.
                    url.username(base.username());
                    url.password(base.password());
                    url.server(base.server());//host
                    url.port(base.port());
                    url.path(base.path());
                    url.pop();
                    //2. Set state to path state, and decrease pointer by one.
                    state=PATH;
                    pointer--;
                }
                break;
            }
        }
    }
    
    /** Have seen one "/". */
    private void relativeSlash(){
     
        //If either c is "/", or url is special and c is "\", run these substeps:
        //Seen //, next the host.
        if(c=='/' || (url.isSpecial() && c=='\\')){
            //1. If c is "\", syntax violation.
            if(c=='\\')
                syntaxViolation();
            //2. Set state to special authority ignore slashes state.
            state=SPECIAL_AUTHORITY_IGNORE_SLASHES;
        }
        
        //Otherwise, set url’s username to base’s username, url’s password to 
        //base’s password, url’s host to base’s host, url’s port to base’s port, 
        //state to path state, and then, decrease pointer by one.
        //Seen "/", the start of an absolute path.
        else{
            url.username(base.username());
            url.password(base.password());
            url.server(base.server());//host
            url.port(base.port());
            url.path("/");//Alex's line. This is the one / that we have seen.
            state=PATH;
            pointer--;
        }
    }
    
    private void specialAuthoritySlashes(){
        //If c is "/" and remaining starts with "/", set state to special 
        //authority ignore slashes state, and increase pointer by one.
        if(c=='/' && remaining().startsWith("/")){
            state=SPECIAL_AUTHORITY_IGNORE_SLASHES;
            pointer++;
        }
        
        //Otherwise, syntax violation, set state to special authority 
        //ignore slashes state, and decrease pointer by one.
        else{
            syntaxViolation();
            state=SPECIAL_AUTHORITY_IGNORE_SLASHES;
            pointer--;
        }
    }
    
    private void specialAuthorityIgnoreSlashes(){
        //If c is neither "/" nor "\", set state to authority state, and decrease pointer by one.
        if(c!='/' && c!='\\'){
            state=AUTHORITY;
            pointer--;
        }
        //Otherwise, syntax violation.
        else
            syntaxViolation();
    }
    
    private void authority(){
        
        //If c is "@", run these substeps:
        if(c=='@'){
            
            //I don't understand these steps.
            //1. Syntax violation.
            //syntaxViolation();
            //2. If the @ flag is set, prepend "%40" to buffer.
            //if(flagAt)buffer.insert(0, "%40");// %40 = '@'
            //3. Set the @ flag.
            //flagAt=true;
            
            //4. For each codePoint in buffer, run these substeps:
            //They skip EOL chars, separate user and password, utf8percentEncode, and set user and password.
            //I don't understand why the encoding: input is already encoded and output (this.username and 
            //this.password) should be decoded. So I make my own code.
//            for(int i=0; i cs=Arrays.asList('/', '\\', '?', '#');
            if(base!=null && "file".equals(base.scheme()) && (
                    //c and the first code point of remaining are not a Windows drive letter
                    !isWindowsDriveLetter(c, remaining.charAt(0)) ||
                    //remaining consists of one code point
                    1==remaining.length() ||
                    //remaining’s second code point is not one of "/", "\", "?", and "#"
                    !cs.contains(remaining.charAt(1))
                    )){
                //then set url’s host to base’s host, url’s path to base’s path, and then pop url’s path.
                url.server(base.server());//Host
                url.path(base.path());
                url.pop();
            }
            
            //2. Otherwise, if base is non-null and base’s scheme is "file", syntax violation.
            else if(base!=null && "file".equals(base.scheme()))
                syntaxViolation();
            
            //3. Set state to path state, and decrease pointer by one.
            state=PATH;
            pointer--;
            break;
        }
    }
    
    private void fileSlash(){
        
        //1. If c is "/" or "\", run these substeps:
        if(c=='/' || c=='\\'){
            //1. If c is "\", syntax violation.
            if(c=='\\')
                syntaxViolation();
            //2. Set state to file host state.
            state=FILE_HOST;
        }
        
        //2. Otherwise, run these substeps:
        else{
            //1. If base is non-null, base’s scheme is "file", and base’s 
            //path first string is a normalized Windows drive letter, 
            //append base’s path first string to url’s path.
            if(base!=null && "file".equals(base.scheme()) && isNormalizedWindowsDriveLetter(first(base.getPath()))){
                String f=first(base.path());
                appendPath(url, f);//correct?
            }
            //2. Set state to path state, and decrease pointer by one.
            state=PATH;
            pointer--;
        }
    }
    
    private void fileHost(){
        //1. If c is one of EOF code point, "/", "\", "?", and "#", 
        //decrease pointer by one, and run these substeps:
        if(EOF || c=='/' || c=='\\' || c=='?' || c=='#'){
            pointer--;
            
            //1. If buffer is a Windows drive letter, syntax violation, 
            //set state to path state.
            if(isWindowsDriveLetter(buffer)){
                syntaxViolation();
                state=PATH;
            }
            
            //2. Otherwise, if buffer is the empty string, set state to path start state.
            else if(isEmpty(buffer))
                state=PATH_START;
            
            //3. Otherwise, run these steps:
            else{
                //1. Let host be the result of host parsing buffer.
                //2. If host is failure, return failure.
                String host=hostParsing(buffer);
                //3. If host is not "localhost", set url’s host to host.
                if(!"localhost".equals(host))
                    url.server(host);
                //4. Set buffer to the empty string and state to path start state.
                empty(buffer);
                state=PATH_START;
            }
        }
        
        //2. Otherwise, if c is U+0009, U+000A, or U+000D, syntax violation.
        else if(c==9 || c==10 || c==13)
            syntaxViolation();
        
        //3. Otherwise, append c to buffer.
        else
            buffer.append(c);
    }
    
    private void pathStart(){
        //1. If url is special and c is "\", syntax violation.
        if(url.isSpecial() && c=='\\')
            syntaxViolation();
        
        //2. Set state to path state, and if neither c is "/", 
        //nor url is special and c is "\", decrease pointer by one.
        state=PATH;
        //if(c!='/' && !(url.isSpecial() && c=='\\'))   //Alex comments this line out
        pointer--;
    }
    
    private void path(){
        //1. If c is EOF code point or "/", or url is special and c is "\", 
        //or state override is not given and c is "?" or "#", run these substeps:
        if(EOF || c=='/' || (url.isSpecial() && c=='\\') || (stateOverride==null && (c=='?' || c=='#'))){
            
            //1. If url is special and c is "\", syntax violation.
            if(url.isSpecial() && c=='\\')
                syntaxViolation();
            
            //2. If buffer is a double-dot path segment, pop url’s path, and 
            //then if neither c is "/", nor url is special and c is "\", 
            //append the empty string to url’s path.
            if(isDoubleDot(buffer)){
                url.pop();
                if(c!='/' && !(url.isSpecial() && c=='\\'))
                    appendPath(url, "");
            }
            
            //3. Otherwise, if buffer is a single-dot path segment and if 
            //neither c is "/", nor url is special and c is "\", append 
            //the empty string to url’s path.
            else if(isSingleDot(buffer) && c!='/' && !(url.isSpecial() && c=='\\'))
                appendPath(url, "");
            
            //4. Otherwise, if buffer is not a single-dot path segment, run these subsubsteps:
            else if(!isSingleDot(buffer)){
                
                //1. If url’s scheme is "file", url’s path is empty, and 
                //buffer is a Windows drive letter, run these subsubsubsteps:
                if("file".equals(url.scheme()) && url.path()==null && isWindowsDriveLetter(buffer)){
                
                    //1. If url’s host is non-null, syntax violation.
                    if(url.server()!=null)
                        syntaxViolation();
                    
                    //2. Set url’s host to null and replace the second 
                    //code point in buffer with ":".
                    url.server(null);//host
                    buffer.setCharAt(1, ':');
                }
                
                //2. Append buffer to url’s path.
                if(!isEmpty(buffer))
                    appendPath(url, buffer);
                if(c=='/')
                    appendPath(url, "/");//Alex's line. This is the / that we have consumed.
            }
            
            //5. Set buffer to the empty string.
            empty(buffer);
            
            //6. If c is "?", set url’s query to the empty string, and 
            //state to query state.
            if(c=='?'){
                url.setParameters(null);
                state=QUERY;
            }
        
            //7. If c is "#", set url’s fragment to the empty string, 
            //and state to fragment state.
            else if(c=='#'){
                url.fragment("");
                state=FRAGMENT;
            }
        }
        
        //2. Otherwise, if c is U+0009, U+000A, or U+000D, syntax violation.
        else if(c==9 || c==10 || c==13)
            syntaxViolation();
        
        //3. Otherwise, run these steps:
        else{
            //1. If c is not a URL code point and not "%", syntax violation.
            if(!isURLCodePoint(c) && c!='%')
                syntaxViolation();
            
            //2. If c is "%" and remaining does not start with two ASCII 
            //hex digits, syntax violation.
            if(c=='%' && !remainingStartsWithTwoHex())
                syntaxViolation();
            
            //3. utf-8 percent encode c using the default encode set, and 
            //append the result to buffer.
            buffer.append(encode(c));
        }
    }
    
    private void nonRelativePath(){
        //1. If c is "?", set url’s query to the empty string and state to query state.
        if(c=='?'){
            //url.parameters(new URLParameters());  //Don't need this. Later will initialise.
            state=QUERY;
        }
        
        //2. Otherwise, if c is "#", set url’s fragment to the empty string 
        //and state to fragment state.
        else if(c=='#'){
            url.fragment("");
            state=FRAGMENT;
        }
        
        //3. Otherwise, run these substeps:
        else{
            //1. If c is not the EOF code point, not a URL code point, and 
            //not "%", syntax violation.
            if(!EOF && !isURLCodePoint(c) && c!='%')
                syntaxViolation();
        
            //2. If c is "%" and remaining does not start with two ASCII 
            //hex digits, syntax violation.
            if(c=='%' && !remainingStartsWithTwoHex())
                syntaxViolation();
            
            //3. If c is none of EOF code point, U+0009, U+000A, and U+000D, 
            //utf-8 percent encode c using the simple encode set, and append 
            //the result to the first string in url’s path.
            if(!EOF && c!=9 && c!=10 && c!=13){
                String s=encode(c);
                url.path(url.path() + s);// not correct?
            }
        }
    }
    
    private void query(){
        
        //1. If c is the EOF code point, or state override is not given and 
        //c is "#", run these substeps:
        if(EOF || (stateOverride==null && c=='#')){
        
            //1. If url is not special or url’s scheme is either "ws" or 
            //"wss", set encoding override to utf-8.
            if(!url.isSpecial() || "ws".equals(url.scheme()) || "wss".equals(url.scheme()))
                encodingOverride="UTF-8";
            
            //Simplified:
            String s=buffer.toString();
            URLParameters ps=new URLParameters(s);
            url.parameters(ps);
            //2. Set buffer to the result of encoding buffer using encoding override.
            //3. For each byte in buffer run these subsubsteps:
                //1. If byte is less than 0x21, greater than 0x7E, or is one of 
                //0x22, 0x23, 0x3C, and 0x3E, append byte, percent encoded, to url’s query.
                //Does not encode = and &.
                //Encoded control characters, " # < >
                //So this is extra cleaning.
                //if(b<'!' || '~'')
                //2. Otherwise, append a code point whose value is byte to url’s query.
            
            //4. Set buffer to the empty string.
            empty(buffer);
            
            //5. If c is "#", set url’s fragment to the empty string, and state to fragment state.
            if(c=='#'){
                url.fragment("");
                state=FRAGMENT;
            }
        }
        
        //2. Otherwise, if c is U+0009, U+000A, or U+000D, syntax violation.
        else if(c==9 || c==10 || c==13)
            syntaxViolation();
        
        //3. Otherwise, run these substeps:
        else{
            //1. If c is not a URL code point and not "%", syntax violation.
            if(!isURLCodePoint(c) && c!='%')
                syntaxViolation();
            
            //2. If c is "%" and remaining does not start with two ASCII hex digits, syntax violation.
            if(c=='%' && !remainingStartsWithTwoHex())
                syntaxViolation();
            
            //3. Append c to buffer.
            buffer.append(c);//Collects the query string, encoded
        }
    }
    
    private void fragment(){
        //Collect the fragment in buffer. Decode once at EOF.        
        if(!EOF){
            //Switching on c:
            switch(c){
            
            case 0:
            case 9:
            case 10:
            case 13:
                syntaxViolation();break;
            
            default:
                //1. If c is not a URL code point and not "%", syntax violation.
                if(!isURLCodePoint(c) && c!='%')
                    syntaxViolation();
            
                //2. If c is "%" and remaining does not start with two ASCII 
                //hex digits, syntax violation.
                if(c=='%' && !remainingStartsWithTwoHex())
                    syntaxViolation();
            
                //3. Append c to url’s fragment.
                //Collect the chars in buffer. Decode once at EOF.
                buffer.append(c);
            }
        }
        //We are at EOF. Decode the collected fragment in the buffer.
        else{
            String e=buffer.toString();
            e=decode(e);
            url.fragment(e);
        }
    }
    
    // helpers ----------------------------------------------
    
    /** Initialises a URL's path if necessary, and appends a string to it. */
    private void appendPath(URL u, String s){
        String p=u.path();
        if(p==null)p="";
        p += s;
        u.path(p);
    }
    
    /** Initialises a URL's path if necessary, and appends a string builder's contents to it. */
    private void appendPath(URL u, StringBuilder builder){
        String s=buffer.toString();
        appendPath(u, s);
    }
    
    /** Deal with leading and trailing C0 controls and space.
     * Not implemented, just returns the string. */
    private String clean(String input){return input;}
    
    /** Cloning parameters without stumbling over null. */
    private URLParameters clone(URLParameters ps){
        if(ps==null || ps.isEmpty())
            return null;
        return ps.clone();
    }
    
    /** Makes a string builder be empty. */
    private StringBuilder empty(StringBuilder builder){return builder.delete(0, builder.length());}

    /** Are two objects equal or both null? */
    private boolean equals(Object a, Object b){
        if(a==null)
            return b==null;
        return a.equals(b);
    }
    
    private boolean isASCIIAlpha(char c){return ('A'<=c && c<='Z') || ('a'<=c && c<='z');}
    private boolean isASCIIAlphaNumeric(char c){return isASCIIAlpha(c) || isASCIIDigit(c);}
    private boolean isASCIIDigit(char c){return '0'<=c && c<='9';}

    /** The ASCII hex digits are ASCII digits, code points in the range U+0041 to 
     * U+0046, inclusive, and code points in the range U+0061 to U+0066, inclusive.*/
    private boolean isASCIIHexDigit(char c){
        return
                ('0'<=c && c<='9') ||
                ('\u0041'<=c && c<='\u0046') ||
                ('\u0061'<=c && c<='\u0066');
    }

    /** A double-dot path segment must be ".." or an ASCII case-insensitive match 
     * for one of ".%2e", "%2e.", and "%2e%2e".*/
    private boolean isDoubleDot(StringBuilder buffer){
        if(buffer==null)return false;
        String s=buffer.toString().toLowerCase();
        switch(s){
        case "..": return true;
        case ".%2e": return true;
        case "%2e.": return true;
        case "%2e%2e": return true;
        }
        return false;
    }
    
    /** A single-dot path segment must be "." or an ASCII case-insensitive match for "%2e". */
    private boolean isSingleDot(StringBuilder buffer){
        if(buffer==null)return false;
        String s=buffer.toString().toLowerCase();
        switch(s){
        case ".": return true;
        case "%2e": return true;
        }
        return false;
    }
    
    /** Does the builder contain a special scheme? */
    private boolean isSpecialScheme(StringBuilder builder){
        URL u=new URL().scheme(builder.toString());
        return u.isSpecial();
    }
    
    private boolean isEmpty(StringBuilder builder){return 0==builder.length();}
    
    /** A normalized Windows drive letter is a Windows drive letter of which 
     * the second code point is ":". */
    private boolean isNormalizedWindowsDriveLetter(String s){
        return isWindowsDriveLetter(s) && s.charAt(1)==':';
    }
    
    /** The URL code points are ASCII alphanumeric, "!", "$", "&", "'", "(", ")", 
     * "*", "+", ",", "-", ".", "/", ":", ";", "=", "?", "@", "_", "~", and code 
     * points in the ranges U+00A0 to U+D7FF, U+E000 to U+FDCF, U+FDF0 to U+FFFD, 
     * U+10000 to U+1FFFD, U+20000 to U+2FFFD, U+30000 to U+3FFFD, U+40000 to U+4FFFD, 
     * U+50000 to U+5FFFD, U+60000 to U+6FFFD, U+70000 to U+7FFFD, U+80000 to U+8FFFD, 
     * U+90000 to U+9FFFD, U+A0000 to U+AFFFD, U+B0000 to U+BFFFD, U+C0000 to U+CFFFD, 
     * U+D0000 to U+DFFFD, U+E0000 to U+EFFFD, U+F0000 to U+FFFFD, U+100000 to U+10FFFD.*/
    private boolean isURLCodePoint(char c){
        if(isASCIIAlphaNumeric(c))return true;
        switch(c){
        case '!':
        case '$':
        case '&':
        case '\'':
        case '(':
        case ')':
        case '*':
        case '+':
        case ',':
        case '-':
        case '.':
        case '/':
        case ':':
        case ';':
        case '=':
        case '?':
        case '@':
        case '_':
        case '~':
            return true;
        }
        return
                ('\u00A0'<=c && c<='\uD7FF') ||
                ('\uE000'<=c && c<='\uFDCF') ||
                ('\uFDF0'<=c && c<='\uFFFD');
        //XXX I don't know how to represent the others in Java.
    }

    /** A Windows drive letter is two code points, of which the first is an 
     * ASCII alpha and the second is either ":" or "|".*/
    private boolean isWindowsDriveLetter(char a, char b){
        return isASCIIAlpha(a) && (b==':' || b=='|');
    }
    
    /** A Windows drive letter is two code points, of which the first is an 
     * ASCII alpha and the second is either ":" or "|".*/
    private boolean isWindowsDriveLetter(String s){
        if(s==null || 2!=s.length()) return false;
        return isWindowsDriveLetter(s.charAt(0), s.charAt(1));
    }

    /** A Windows drive letter is two code points, of which the first is an 
     * ASCII alpha and the second is either ":" or "|".*/
    private boolean isWindowsDriveLetter(StringBuilder s){
        if(s==null || 2!=s.length())
            return false;
        return isWindowsDriveLetter(s.charAt(0), s.charAt(1));
    }

    /** The first element of a path. */
    private String first(String path){
        if(path==null)
            return "";
        int slash=path.indexOf('/');
        if(slash==-1)
            return "";
        return path.substring(0, slash);
        //XXX Does path start with '/' ? Yes.
    }
    
    /** https://url.spec.whatwg.org/#concept-host-parser
     * Dummy implementation. */
    private String hostParsing(String s){return s;}
    private String hostParsing(StringBuilder buffer){return hostParsing(buffer.toString());}

    /** Try to parse an integer from a string builder. 
     * @throws NumberFormatException Cannot parse integer. */
    private int parseInt(StringBuilder builder){
        return Integer.parseInt(builder.toString());
    }
    
    /** The remaining input after pointer. */
    private String remaining(){
        if(input.length()", "?", "`", "{", and "}".
        //The user info encode set is the default encode set and code points "/", ":", ";", "=", "@", "[", "\", "]", "^", and "|".
        //approximate implementation
        try{
            return URLEncoder.encode("" + c, "UTF-8");//UnsupportedEncodingException
        }catch(UnsupportedEncodingException e){throw new RuntimeException(e);}
    }

    /** UTF-8 percent decode codePoint using the user info encode set. 
     * Spec: https://url.spec.whatwg.org/#utf-8-percent-encode */
    private String decode(String s){
        try{
            return URLDecoder.decode(s, "UTF-8");//UnsupportedEncodingException
        }catch(UnsupportedEncodingException e){throw new RuntimeException(e);}
    }

    /** Parsing fails. */
    private void fail(){throw new Failure();}
}