com.github.jsonldjava.utils.JsonLdUrl Maven / Gradle / Ivy

Go to download
package com.github.jsonldjava.utils;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class JsonLdUrl {

    public String href = "";
    public String protocol = "";
    public String host = "";
    public String auth = "";
    public String user = "";
    public String password = "";
    public String hostname = "";
    public String port = "";
    public String relative = "";
    public String path = "";
    public String directory = "";
    public String file = "";
    public String query = "";
    public String hash = "";

    // things not populated by the regex (NOTE: i don't think it matters if
    // these are null or "" to start with)
    public String pathname = null;
    public String normalizedPath = null;
    public String authority = null;

    private static Pattern parser = Pattern.compile(
            "^(?:([^:\\/?#]+):)?(?:\\/\\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\\/?#]*)(?::(\\d*))?))?((((?:[^?#\\/]*\\/)*)([^?#]*))(?:\\?([^#]*))?(?:#(.*))?)");

    public static JsonLdUrl parse(String url) {
        final JsonLdUrl rval = new JsonLdUrl();
        rval.href = url;

        final Matcher matcher = parser.matcher(url);
        if (matcher.matches()) {
            if (matcher.group(1) != null) {
                rval.protocol = matcher.group(1);
            }
            if (matcher.group(2) != null) {
                rval.host = matcher.group(2);
            }
            if (matcher.group(3) != null) {
                rval.auth = matcher.group(3);
            }
            if (matcher.group(4) != null) {
                rval.user = matcher.group(4);
            }
            if (matcher.group(5) != null) {
                rval.password = matcher.group(5);
            }
            if (matcher.group(6) != null) {
                rval.hostname = matcher.group(6);
            }
            if (matcher.group(7) != null) {
                rval.port = matcher.group(7);
            }
            if (matcher.group(8) != null) {
                rval.relative = matcher.group(8);
            }
            if (matcher.group(9) != null) {
                rval.path = matcher.group(9);
            }
            if (matcher.group(10) != null) {
                rval.directory = matcher.group(10);
            }
            if (matcher.group(11) != null) {
                rval.file = matcher.group(11);
            }
            if (matcher.group(12) != null) {
                rval.query = matcher.group(12);
            }
            if (matcher.group(13) != null) {
                rval.hash = matcher.group(13);
            }

            // normalize to node.js API
            if (!"".equals(rval.host) && "".equals(rval.path)) {
                rval.path = "/";
            }
            rval.pathname = rval.path;
            parseAuthority(rval);
            rval.normalizedPath = removeDotSegments(rval.pathname, !"".equals(rval.authority));
            if (!"".equals(rval.query)) {
                rval.path += "?" + rval.query;
            }
            if (!"".equals(rval.protocol)) {
                rval.protocol += ":";
            }
            if (!"".equals(rval.hash)) {
                rval.hash = "#" + rval.hash;
            }
            return rval;
        }

        return rval;
    }

    /**
     * Removes dot segments from a JsonLdUrl path.
     *
     * @param path
     *            the path to remove dot segments from.
     * @param hasAuthority
     *            true if the JsonLdUrl has an authority, false if not.
     * @return The URL without the dot segments
     */
    public static String removeDotSegments(String path, boolean hasAuthority) {
        String rval = "";

        if (path.indexOf("/") == 0) {
            rval = "/";
        }

        // RFC 3986 5.2.4 (reworked)
        final List input = new ArrayList(Arrays.asList(path.split("/")));
        if (path.endsWith("/")) {
            // javascript .split includes a blank entry if the string ends with
            // the delimiter, java .split does not so we need to add it manually
            input.add("");
        }
        final List output = new ArrayList();
        for (int i = 0; i < input.size(); i++) {
            if (".".equals(input.get(i)) || ("".equals(input.get(i)) && input.size() - i > 1)) {
                // input.remove(0);
                continue;
            }
            if ("..".equals(input.get(i))) {
                // input.remove(0);
                if (hasAuthority
                        || (output.size() > 0 && !"..".equals(output.get(output.size() - 1)))) {
                    // [].pop() doesn't fail, to replicate this we need to check
                    // that there is something to remove
                    if (output.size() > 0) {
                        output.remove(output.size() - 1);
                    }
                } else {
                    output.add("..");
                }
                continue;
            }
            output.add(input.get(i));
            // input.remove(0);
        }

        if (output.size() > 0) {
            rval += output.get(0);
            for (int i = 1; i < output.size(); i++) {
                rval += "/" + output.get(i);
            }
        }
        return rval;
    }

    public static String removeBase(Object baseobj, String iri) {
        if (baseobj == null) {
            return iri;
        }

        JsonLdUrl base;
        if (baseobj instanceof String) {
            base = JsonLdUrl.parse((String) baseobj);
        } else {
            base = (JsonLdUrl) baseobj;
        }

        // establish base root
        String root = "";
        if (!"".equals(base.href)) {
            root += (base.protocol) + "//" + base.authority;
        }
        // support network-path reference with empty base
        else if (iri.indexOf("//") != 0) {
            root += "//";
        }

        // IRI not relative to base
        if (iri.indexOf(root) != 0) {
            return iri;
        }

        // remove root from IRI and parse remainder
        final JsonLdUrl rel = JsonLdUrl.parse(iri.substring(root.length()));

        // remove path segments that match
        final List baseSegments = new ArrayList(
                Arrays.asList(base.normalizedPath.split("/")));
        if (base.normalizedPath.endsWith("/")) {
            baseSegments.add("");
        }
        final List iriSegments = new ArrayList(
                Arrays.asList(rel.normalizedPath.split("/")));
        if (rel.normalizedPath.endsWith("/")) {
            iriSegments.add("");
        }

        while (baseSegments.size() > 0 && iriSegments.size() > 0) {
            if (!baseSegments.get(0).equals(iriSegments.get(0))) {
                break;
            }
            if (baseSegments.size() > 0) {
                baseSegments.remove(0);
            }
            if (iriSegments.size() > 0) {
                iriSegments.remove(0);
            }
        }

        // use '../' for each non-matching base segment
        String rval = "";
        if (baseSegments.size() > 0) {
            // don't count the last segment if it isn't a path (doesn't end in
            // '/')
            // don't count empty first segment, it means base began with '/'
            if (!base.normalizedPath.endsWith("/") || "".equals(baseSegments.get(0))) {
                baseSegments.remove(baseSegments.size() - 1);
            }
            for (int i = 0; i < baseSegments.size(); ++i) {
                rval += "../";
            }
        }

        // prepend remaining segments
        if (iriSegments.size() > 0) {
            rval += iriSegments.get(0);
        }
        for (int i = 1; i < iriSegments.size(); i++) {
            rval += "/" + iriSegments.get(i);
        }

        // add query and hash
        if (!"".equals(rel.query)) {
            rval += "?" + rel.query;
        }
        if (!"".equals(rel.hash)) {
            rval += rel.hash;
        }

        if ("".equals(rval)) {
            rval = "./";
        }

        return rval;
    }

    public static String resolve(String baseUri, String pathToResolve) {
        // TODO: some input will need to be normalized to perform the expected
        // result with java
        // TODO: we can do this without using java URI!
        if (baseUri == null) {
            return pathToResolve;
        }
        if (pathToResolve == null || "".equals(pathToResolve.trim())) {
            return baseUri;
        }
        try {
            URI uri = new URI(baseUri);
            // URI#resolve drops base scheme for opaque URIs, https://github.com/jsonld-java/jsonld-java/issues/232
            if (uri.isOpaque()) {
                String basePath = uri.getPath() != null ? uri.getPath() : uri.getSchemeSpecificPart();
                // Drop the last segment, see https://tools.ietf.org/html/rfc3986#section-5.2.3 (2nd bullet point)
                basePath = basePath.contains("/") ? basePath.substring(0, basePath.lastIndexOf('/') + 1) : "";
                return new URI(uri.getScheme(), basePath + pathToResolve, null).toString();
            }
            // "a base URI [...] does not allow a fragment" (https://tools.ietf.org/html/rfc3986#section-4.3)
            uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), uri.getQuery(), null);
            // query string parsing
            if (pathToResolve.startsWith("?")) {
                // drop query, https://tools.ietf.org/html/rfc3986#section-5.2.2: T.query = R.query;
                uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, null);
                // add query to the end manually (as URI#resolve does it wrong)
                return uri.toString() + pathToResolve;
            } else if (pathToResolve.startsWith("#")) {
                // add fragment to the end manually (as URI#resolve does it wrong)
                return uri.toString() + pathToResolve;
            }
            // ensure a slash between the authority and the path of a URL
            if (uri.getSchemeSpecificPart().startsWith("//") && !uri.getSchemeSpecificPart().matches("//.*/.*")) {
                uri = new URI(uri + "/");
            }
            uri = uri.resolve(pathToResolve);
            // java doesn't discard unnecessary dot segments
            String path = uri.getPath();
            if (path != null) {
                path = JsonLdUrl.removeDotSegments(path, true);
            }
            return new URI(uri.getScheme(), uri.getAuthority(), path, uri.getQuery(),
                    uri.getFragment()).toString();
        } catch (final URISyntaxException e) {
            return null;
        }
    }

    /**
     * Parses the authority for the pre-parsed given JsonLdUrl.
     *
     * @param parsed
     *            the pre-parsed JsonLdUrl.
     */
    private static void parseAuthority(JsonLdUrl parsed) {
        // parse authority for unparsed relative network-path reference
        if (parsed.href.indexOf(":") == -1 && parsed.href.indexOf("//") == 0
                && "".equals(parsed.host)) {
            // must parse authority from pathname
            parsed.pathname = parsed.pathname.substring(2);
            final int idx = parsed.pathname.indexOf("/");
            if (idx == -1) {
                parsed.authority = parsed.pathname;
                parsed.pathname = "";
            } else {
                parsed.authority = parsed.pathname.substring(0, idx);
                parsed.pathname = parsed.pathname.substring(idx);
            }
        } else {
            // construct authority
            parsed.authority = parsed.host;
            if (!"".equals(parsed.auth)) {
                parsed.authority = parsed.auth + "@" + parsed.authority;
            }
        }
    }
}